From 5a392951e7b84b175f3010f93abe09e58d782c27 Mon Sep 17 00:00:00 2001
From: Christian Heller <c.heller@plomlompom.de>
Date: Wed, 20 Nov 2024 13:56:50 +0100
Subject: [PATCH] Add "files" table to store what files to expect in downloads
 directory.

---
 ytplom.py | 111 ++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 75 insertions(+), 36 deletions(-)

diff --git a/ytplom.py b/ytplom.py
index d3f3f2a..b3cd23d 100755
--- a/ytplom.py
+++ b/ytplom.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 """Minimalistic download-focused YouTube interface."""
 from typing import TypeAlias, Optional, NewType, Callable, Self, Any
-from os import environ, makedirs, scandir, remove as os_remove
+from os import chdir, environ, getcwd, makedirs, scandir, remove as os_remove
 from os.path import (isdir, isfile, exists as path_exists, join as path_join,
                      splitext, basename)
 from random import shuffle
@@ -25,7 +25,7 @@ HTTP_PORT = 8084
 
 DatetimeStr = NewType('DatetimeStr', str)
 QuotaCost = NewType('QuotaCost', int)
-VideoId = NewType('VideoId', str)
+YoutubeId = NewType('YoutubeId', str)
 PathStr = NewType('PathStr', str)
 QueryId = NewType('QueryId', int)
 QueryText = NewType('QueryText', str)
@@ -33,11 +33,11 @@ ProseText = NewType('ProseText', str)
 SqlText = NewType('SqlText', str)
 AmountDownloads = NewType('AmountDownloads', int)
 PlayerUpdateId = NewType('PlayerUpdateId', str)
-DownloadsIndex: TypeAlias = dict[VideoId, PathStr]
+DownloadsIndex: TypeAlias = dict[YoutubeId, PathStr]
 TemplateContext: TypeAlias = dict[
-        str, None | bool | PlayerUpdateId | Optional[PathStr] | VideoId
-        | QueryText | QuotaCost | 'VideoData' | list['VideoData']
-        | list['QueryData'] | list[tuple[VideoId, PathStr]]
+        str, None | bool | PlayerUpdateId | Optional[PathStr] | YoutubeId
+        | QueryText | QuotaCost | 'YoutubeVideo' | list['YoutubeVideo']
+        | list['QueryData'] | list[tuple[YoutubeId, PathStr]]
         | list[tuple[PathStr, PathStr]]]
 
 
@@ -100,6 +100,11 @@ CREATE TABLE quota_costs (
   timestamp TEXT NOT NULL,
   cost INT NOT NULL
 );
+CREATE TABLE files (
+  rel_path TEXT PRIMARY KEY,
+  yt_id TEXT NOT NULL DEFAULT "",
+  FOREIGN KEY (yt_id) REFERENCES yt_videos(id)
+);
 '''
 
 
@@ -194,9 +199,9 @@ class QueryData(DbData):
     @classmethod
     def get_all_for_video(cls,
                           conn: DatabaseConnection,
-                          video_id: VideoId
+                          video_id: YoutubeId
                           ) -> list[Self]:
-        """Return all QueryData that got VideoData of video_id as result."""
+        """Return all QueryData that got YoutubeVideo of video_id as result."""
         sql = SqlText('SELECT query_id FROM '
                       'yt_query_results WHERE video_id = ?')
         query_ids = conn.exec(sql, (video_id,)).fetchall()
@@ -204,14 +209,14 @@ class QueryData(DbData):
                 for query_id_tup in query_ids]
 
 
-class VideoData(DbData):
+class YoutubeVideo(DbData):
     """Representation of YouTube video metadata as provided by their API."""
     _table_name = 'yt_videos'
     _cols = ('id_', 'title', 'description', 'published_at', 'duration',
              'definition')
 
     def __init__(self,
-                 id_: VideoId,
+                 id_: YoutubeId,
                  title: ProseText = ProseText('?'),
                  description: ProseText = ProseText('?'),
                  published_at: DatetimeStr = DatetimeStr('?'),
@@ -269,6 +274,21 @@ class VideoData(DbData):
                   (query_id, self.id_))
 
 
+class VideoFile(DbData):
+    """Collects data about downloaded files."""
+    _table_name = 'files'
+    _cols = ('rel_path', 'yt_id')
+
+    def __init__(self, rel_path: PathStr, yt_id: YoutubeId) -> None:
+        self.rel_path = rel_path
+        self.yt_id = yt_id
+
+    def remove(self, conn: DatabaseConnection) -> None:
+        """Remove self from database by self.rel_path as identifier."""
+        sql = SqlText(f'DELETE FROM {self._table_name} WHERE rel_path = ?')
+        conn.exec(SqlText(sql), (self.rel_path,))
+
+
 class QuotaLog(DbData):
     """Collects API access quota costs."""
     _table_name = 'quota_costs'
@@ -424,29 +444,48 @@ class DownloadsDb:
     """Collections downloading-related stuff."""
 
     def __init__(self) -> None:
-        self._to_download: list[VideoId] = []
+        self._to_download: list[YoutubeId] = []
         _ensure_expected_dirs([PATH_DIR_DOWNLOADS, PATH_DIR_TEMP])
+        self._sync_db()
+
+    def _sync_db(self):
+        conn = DatabaseConnection()
+        files_via_db = VideoFile.get_all(conn)
+        old_cwd = getcwd()
+        chdir(PATH_DIR_DOWNLOADS)
+        for file in files_via_db:
+            if not isfile(path_join(file.rel_path)):
+                print(f'SYNC: no file {file.rel_path} found, removing entry.')
+                file.remove(conn)
+        paths = [file.rel_path for file in files_via_db]
+        for path in [PathStr(e.path) for e in scandir() if isfile(e.path)]:
+            if path not in paths:
+                yt_id = self._id_from_filename(path)
+                file = VideoFile(path, yt_id)
+                print(f'SYNC: new file {path}, saving with YT ID "{yt_id}".')
+                file.save(conn)
+        chdir(old_cwd)
+        self._files = VideoFile.get_all(conn)
+        conn.commit_close()
 
     @staticmethod
     def _id_from_filename(path: PathStr,
                           double_split: bool = False
-                          ) -> VideoId:
+                          ) -> YoutubeId:
         before_ext = splitext(path)[0]
         if double_split:
             before_ext = splitext(before_ext)[0]
-        return VideoId(before_ext.split('[')[-1].split(']')[0])
+        return YoutubeId(before_ext.split('[')[-1].split(']')[0])
 
     @property
     def ids_to_paths(self) -> DownloadsIndex:
-        """Return mapping of VideoIds to paths of files downloaded to them."""
-        ids_to_paths = {}
-        for path in [PathStr(e.path) for e
-                     in scandir(PATH_DIR_DOWNLOADS) if isfile(e.path)]:
-            ids_to_paths[self._id_from_filename(path)] = PathStr(path)
-        return ids_to_paths
+        """Return mapping YoutubeIds:paths of files downloaded to them."""
+        self._sync_db()
+        return {f.yt_id: PathStr(path_join(PATH_DIR_DOWNLOADS, f.rel_path))
+                for f in self._files}
 
     @property
-    def ids_unfinished(self) -> set[VideoId]:
+    def ids_unfinished(self) -> set[YoutubeId]:
         """Return set of IDs of videos awaiting or currently in download."""
         in_temp_dir = []
         for path in [PathStr(e.path) for e
@@ -460,7 +499,7 @@ class DownloadsDb:
             print(f'removing unfinished download: {e.path}')
             os_remove(e.path)
 
-    def queue_download(self, video_id: VideoId) -> None:
+    def queue_download(self, video_id: YoutubeId) -> None:
         """Add video_id to download queue *if* not already processed."""
         pre_existing = self.ids_unfinished | set(self._to_download
                                                  + list(self.ids_to_paths))
@@ -537,7 +576,7 @@ class TaskHandler(BaseHTTPRequestHandler):
     def _post_query(self, query_txt: QueryText) -> None:
         conn = DatabaseConnection()
 
-        def collect_results(query_txt: QueryText) -> list[VideoData]:
+        def collect_results(query_txt: QueryText) -> list[YoutubeVideo]:
             youtube = googleapiclient.discovery.build('youtube', 'v3',
                                                       developerKey=API_KEY)
             QuotaLog.update(conn, QUOTA_COST_YOUTUBE_SEARCH)
@@ -547,18 +586,18 @@ class TaskHandler(BaseHTTPRequestHandler):
                     maxResults=25,
                     safeSearch='none',
                     type='video')
-            results: list[VideoData] = []
-            ids_to_detail: list[VideoId] = []
+            results: list[YoutubeVideo] = []
+            ids_to_detail: list[YoutubeId] = []
             for item in search_request.execute()['items']:
-                video_id: VideoId = item['id']['videoId']
+                video_id: YoutubeId = item['id']['videoId']
                 ids_to_detail += [video_id]
                 snippet = item['snippet']
                 urlretrieve(snippet['thumbnails']['default']['url'],
                             path_join(PATH_DIR_THUMBNAILS, f'{video_id}.jpg'))
-                results += [VideoData(id_=video_id,
-                                      title=snippet['title'],
-                                      description=snippet['description'],
-                                      published_at=snippet['publishedAt'])]
+                results += [YoutubeVideo(id_=video_id,
+                                         title=snippet['title'],
+                                         description=snippet['description'],
+                                         published_at=snippet['publishedAt'])]
             QuotaLog.update(conn, QUOTA_COST_YOUTUBE_DETAILS)
             ids_for_details = ','.join([r.id_ for r in results])
             videos_request = youtube.videos().list(id=ids_for_details,
@@ -592,11 +631,11 @@ class TaskHandler(BaseHTTPRequestHandler):
             if 'thumbnails' == page_name:
                 self._send_thumbnail(PathStr(toks_url[2]))
             elif 'dl' == page_name:
-                self._send_or_download_video(VideoId(toks_url[2]))
+                self._send_or_download_video(YoutubeId(toks_url[2]))
             elif 'videos' == page_name:
                 self._send_videos_index()
             elif 'video_about' == page_name:
-                self._send_video_about(VideoId(toks_url[2]))
+                self._send_video_about(YoutubeId(toks_url[2]))
             elif 'query' == page_name:
                 self._send_query_page(QueryId(int(toks_url[2])))
             elif 'queries' == page_name:
@@ -636,7 +675,7 @@ class TaskHandler(BaseHTTPRequestHandler):
             img = f.read()
         self._send_http(img, [('Content-type', 'image/jpg')])
 
-    def _send_or_download_video(self, video_id: VideoId) -> None:
+    def _send_or_download_video(self, video_id: YoutubeId) -> None:
         if video_id in self.server.downloads.ids_to_paths:
             with open(self.server.downloads.ids_to_paths[video_id],
                       'rb') as video_file:
@@ -650,7 +689,7 @@ class TaskHandler(BaseHTTPRequestHandler):
     def _send_query_page(self, query_id: QueryId) -> None:
         conn = DatabaseConnection()
         query = QueryData.get_one(conn, str(query_id))
-        results = VideoData.get_all_for_query(conn, query_id)
+        results = YoutubeVideo.get_all_for_query(conn, query_id)
         conn.commit_close()
         self._send_rendered_template(
                 NAME_TEMPLATE_RESULTS,
@@ -666,13 +705,13 @@ class TaskHandler(BaseHTTPRequestHandler):
                 NAME_TEMPLATE_QUERIES, {'queries': queries_data,
                                         'quota_count': quota_count})
 
-    def _send_video_about(self, video_id: VideoId) -> None:
+    def _send_video_about(self, video_id: YoutubeId) -> None:
         conn = DatabaseConnection()
         linked_queries = QueryData.get_all_for_video(conn, video_id)
         try:
-            video_data = VideoData.get_one(conn, video_id)
+            video_data = YoutubeVideo.get_one(conn, video_id)
         except NotFoundException:
-            video_data = VideoData(video_id)
+            video_data = YoutubeVideo(video_id)
         conn.commit_close()
         self._send_rendered_template(
                 NAME_TEMPLATE_VIDEO_ABOUT,
-- 
2.30.2