From: Christian Heller <c.heller@plomlompom.de>
Date: Sun, 1 Dec 2024 07:42:28 +0000 (+0100)
Subject: Turn files.sha512_digest into new primary key, get rid of .rel_path base64-encoding... 
X-Git-Url: https://plomlompom.com/repos/%7B%7Bdb.prefix%7D%7D/static/%7B%7Bprefix%7D%7D/day?a=commitdiff_plain;h=716ab0d4ff7ed7704684a2d337df46e6d9ac93b6;p=ytplom

Turn files.sha512_digest into new primary key, get rid of .rel_path base64-encoding shenanigans.
---

diff --git a/src/migrate.py b/src/migrate.py
index fc63965..e1ba4de 100755
--- a/src/migrate.py
+++ b/src/migrate.py
@@ -47,8 +47,7 @@ def main() -> None:
             msg_apply_prefix = f'Applying migration {version}: '
             for path in [p for p in sorted_paths if _SUFFIX_SQL == p.suffix]:
                 print(f'{msg_apply_prefix}{path}')
-                sql = SqlText(path.read_text(encoding='utf8'))
-                conn.exec(sql)
+                conn.exec_script(path)
             for path in [p for p in sorted_paths if _SUFFIX_PY == p.suffix]:
                 spec = spec_from_file_location(str(path), path)
                 assert spec is not None
diff --git a/src/migrations/2_add_files_sha512.py b/src/migrations/2_add_files_sha512.py
index 329286f..0e10011 100644
--- a/src/migrations/2_add_files_sha512.py
+++ b/src/migrations/2_add_files_sha512.py
@@ -22,7 +22,7 @@ def migrate(conn: DbConn) -> None:
                       (str(f.rel_path),))
     for file in VideoFile.get_all(conn):
         print(f'Calculating digest for: {file.rel_path}')
-        with open(file.full_path, 'rb') as x:
+        with open(file.full_path, 'rb') as f:
             file.sha512_digest = HashStr(
-                    file_digest(x, 'sha512').hexdigest())
+                    file_digest(f, 'sha512').hexdigest())
         file.save(conn)
diff --git a/src/migrations/3_files_redo.sql b/src/migrations/3_files_redo.sql
new file mode 100644
index 0000000..bf485bb
--- /dev/null
+++ b/src/migrations/3_files_redo.sql
@@ -0,0 +1,32 @@
+CREATE TEMPORARY TABLE files_backup (
+  rel_path TEXT PRIMARY KEY,
+  yt_id TEXT NOT NULL DEFAULT "",
+  flags INTEGER NOT NULL DEFAULT 0,
+  last_update TEXT NOT NULL DEFAULT "2000-01-01 12:00:00.123456",
+  sha512_digest TEXT NOT NULL,
+  FOREIGN KEY (yt_id) REFERENCES yt_videos(id)
+);
+INSERT INTO files_backup SELECT
+  rel_path,
+  yt_id,
+  flags,
+  last_update,
+  sha512_digest
+FROM files;
+DROP TABLE files;
+CREATE TABLE files (
+  sha512_digest TEXT PRIMARY KEY,
+  rel_path TEXT NOT NULL,
+  flags INTEGER NOT NULL DEFAULT 0,
+  yt_id TEXT,
+  last_update TEXT NOT NULL,
+  FOREIGN KEY (yt_id) REFERENCES yt_videos(id)
+);
+INSERT INTO files SELECT
+  sha512_digest,
+  rel_path,
+  flags,
+  yt_id,
+  last_update
+FROM files_backup;
+DROP TABLE files_backup;
diff --git a/src/migrations/init_2.sql b/src/migrations/init_2.sql
deleted file mode 100644
index aaa866b..0000000
--- a/src/migrations/init_2.sql
+++ /dev/null
@@ -1,33 +0,0 @@
-CREATE TABLE yt_queries (
-  id TEXT PRIMARY KEY,
-  text TEXT NOT NULL,
-  retrieved_at TEXT NOT NULL
-);
-CREATE TABLE yt_videos (
-  id TEXT PRIMARY KEY,
-  title TEXT NOT NULL,
-  description TEXT NOT NULL,
-  published_at TEXT NOT NULL,
-  duration TEXT NOT NULL,
-  definition TEXT NOT NULL
-);
-CREATE TABLE yt_query_results (
-  query_id TEXT NOT NULL,
-  video_id TEXT NOT NULL,
-  PRIMARY KEY (query_id, video_id),
-  FOREIGN KEY (query_id) REFERENCES yt_queries(id),
-  FOREIGN KEY (video_id) REFERENCES yt_videos(id)
-);
-CREATE TABLE quota_costs (
-  id TEXT PRIMARY KEY,
-  timestamp TEXT NOT NULL,
-  cost INT NOT NULL
-);
-CREATE TABLE files (
-  rel_path TEXT PRIMARY KEY,
-  yt_id TEXT NOT NULL DEFAULT "",
-  flags INTEGER NOT NULL DEFAULT 0,
-  last_update TEXT NOT NULL DEFAULT "2000-01-01 12:00:00.123456",
-  sha512_digest TEXT NOT NULL DEFAULT "",
-  FOREIGN KEY (yt_id) REFERENCES yt_videos(id)
-);
diff --git a/src/migrations/init_3.sql b/src/migrations/init_3.sql
new file mode 100644
index 0000000..d223bef
--- /dev/null
+++ b/src/migrations/init_3.sql
@@ -0,0 +1,33 @@
+CREATE TABLE yt_queries (
+  id TEXT PRIMARY KEY,
+  text TEXT NOT NULL,
+  retrieved_at TEXT NOT NULL
+);
+CREATE TABLE yt_videos (
+  id TEXT PRIMARY KEY,
+  title TEXT NOT NULL,
+  description TEXT NOT NULL,
+  published_at TEXT NOT NULL,
+  duration TEXT NOT NULL,
+  definition TEXT NOT NULL
+);
+CREATE TABLE yt_query_results (
+  query_id TEXT NOT NULL,
+  video_id TEXT NOT NULL,
+  PRIMARY KEY (query_id, video_id),
+  FOREIGN KEY (query_id) REFERENCES yt_queries(id),
+  FOREIGN KEY (video_id) REFERENCES yt_videos(id)
+);
+CREATE TABLE quota_costs (
+  id TEXT PRIMARY KEY,
+  timestamp TEXT NOT NULL,
+  cost INT NOT NULL
+);
+CREATE TABLE files (
+  sha512_digest TEXT PRIMARY KEY,
+  rel_path TEXT NOT NULL,
+  flags INTEGER NOT NULL DEFAULT 0,
+  yt_id TEXT,
+  last_update TEXT NOT NULL,
+  FOREIGN KEY (yt_id) REFERENCES yt_videos(id)
+);
diff --git a/src/templates/file_data.tmpl b/src/templates/file_data.tmpl
index d0e606d..589ef26 100644
--- a/src/templates/file_data.tmpl
+++ b/src/templates/file_data.tmpl
@@ -8,7 +8,7 @@
 <tr><th>YouTube ID:</th><td><a href="/{{page_names.yt_result}}/{{file.yt_id}}">{{file.yt_id}}</a></tr>
 <tr><th>present:</th><td>{% if file.present %}<a href="/{{page_names.download}}/{{file.yt_id}}">yes</a>{% else %}no{% endif %}</td></tr>
 </table>
-<form action="/{{page_names.file}}/{{file.rel_path_b64}}" method="POST" />
+<form action="/{{page_names.file}}/{{file.sha512_digest}}" method="POST" />
 {% for flag_name in flag_names %}
 {{ flag_name }}: <input type="checkbox" name="{{flag_name}}" {% if file.is_flag_set(flag_name) %}checked {% endif %} /><br />
 {% endfor %}
diff --git a/src/templates/files.tmpl b/src/templates/files.tmpl
index 2d58c5b..f2a8024 100644
--- a/src/templates/files.tmpl
+++ b/src/templates/files.tmpl
@@ -15,8 +15,8 @@ show absent: <input type="checkbox" name="show_absent" {% if show_absent %}check
 {% for file in files %}
 <tr>
 <td>{{ file.size | round(3) }}</td>
-<td><input type="submit" name="play_{{file.rel_path_b64}}" value="play" {% if not file.present %}disabled {% endif %}/></td>
-<td><a href="/{{page_names.file}}/{{file.rel_path_b64}}">{{file.rel_path}}</a></td>
+<td><input type="submit" name="play_{{file.sha512_digest}}" value="play" {% if not file.present %}disabled {% endif %}/></td>
+<td><a href="/{{page_names.file}}/{{file.sha512_digest}}">{{file.rel_path}}</a></td>
 </tr>
 {% endfor %}
 </table>
diff --git a/src/templates/playlist.tmpl b/src/templates/playlist.tmpl
index bbf441e..494a7a1 100644
--- a/src/templates/playlist.tmpl
+++ b/src/templates/playlist.tmpl
@@ -48,7 +48,7 @@ td.entry_buttons { width: 5em; }
 <input type="submit" name="up_{{idx}}" value="{% if reverse %}v{% else %}^{% endif %}" />
 <input type="submit" name="down_{{idx}}" value="{% if reverse %}^{% else %}v{% endif %}" />
 </td>
-<td><a href="/{{page_names.file}}/{{file.rel_path_b64}}">{{ file.rel_path }}</a></td>
+<td><a href="/{{page_names.file}}/{{file.sha512_digest}}">{{ file.rel_path }}</a></td>
 </tr>
 {% endfor %}
 </table>
@@ -61,7 +61,7 @@ td.entry_buttons { width: 5em; }
 <table>
 <tr><td id="status" colspan=2>
 {% if running %}{% if pause %}PAUSED{% else %}PLAYING{% endif %}{% else %}STOPPED{% endif %}:<br />
-<a href="/{{page_names.file}}/{{current_video.rel_path_b64}}">{{ current_video.rel_path }}</a><br />
+<a href="/{{page_names.file}}/{{current_video.sha512_digest}}">{{ current_video.rel_path }}</a><br />
 <form action="/{{page_names.playlist}}" method="POST">
 <input type="submit" name="pause" autofocus value="{% if paused %}resume{% else %}pause{% endif %}">
 <input type="submit" name="prev" value="prev">
diff --git a/src/ytplom/http.py b/src/ytplom/http.py
index 24c40b3..29f06ee 100644
--- a/src/ytplom/http.py
+++ b/src/ytplom/http.py
@@ -10,7 +10,7 @@ from urllib.error import HTTPError
 from jinja2 import (  # type: ignore
         Environment as JinjaEnv, FileSystemLoader as JinjaFSLoader)
 from ytplom.misc import (
-        B64Str, FilesWithIndex, FlagName, NotFoundException, PlayerUpdateId,
+        HashStr, FilesWithIndex, FlagName, NotFoundException, PlayerUpdateId,
         QueryId, QueryText, QuotaCost, UrlStr, YoutubeId,
         FILE_FLAGS, PATH_APP_DATA, PATH_THUMBNAILS, YOUTUBE_URL_PREFIX,
         ensure_expected_dirs,
@@ -104,7 +104,7 @@ class _TaskHandler(BaseHTTPRequestHandler):
         if PAGE_NAMES['files'] == page_name:
             self._receive_files_command(list(postvars.keys())[0])
         elif PAGE_NAMES['file'] == page_name:
-            self._receive_video_flag(B64Str(toks_url[2]),
+            self._receive_video_flag(HashStr(toks_url[2]),
                                      [FlagName(k) for k in postvars])
         elif PAGE_NAMES['yt_queries'] == page_name:
             self._receive_yt_query(QueryText(postvars['query'][0]))
@@ -132,24 +132,24 @@ class _TaskHandler(BaseHTTPRequestHandler):
     def _receive_files_command(self, command: str) -> None:
         if command.startswith('play_'):
             with DbConn() as conn:
-                file = VideoFile.get_by_b64(conn,
-                                            B64Str(command.split('_', 1)[1]))
+                file = VideoFile.get_one(conn,
+                                         HashStr(command.split('_', 1)[1]))
             self.server.player.inject_and_play(file)
         self._redirect(Path('/'))
 
     def _receive_video_flag(self,
-                            rel_path_b64: B64Str,
+                            sha512_digest: HashStr,
                             flag_names: list[FlagName]
                             ) -> None:
         with DbConn() as conn:
-            file = VideoFile.get_by_b64(conn, rel_path_b64)
+            file = VideoFile.get_one(conn, sha512_digest)
             file.set_flags([FILE_FLAGS[name] for name in flag_names])
             file.save(conn)
             conn.commit()
         file.ensure_absence_if_deleted()
         self._redirect(Path('/')
                        .joinpath(PAGE_NAMES['file'])
-                       .joinpath(rel_path_b64))
+                       .joinpath(sha512_digest))
 
     def _receive_yt_query(self, query_txt: QueryText) -> None:
         with DbConn() as conn:
@@ -176,7 +176,7 @@ class _TaskHandler(BaseHTTPRequestHandler):
                 show_absent = params.get('show_absent', [False])[0]
                 self._send_files_index(filter_, bool(show_absent))
             elif PAGE_NAMES['file'] == page_name:
-                self._send_file_data(B64Str(toks_url[2]))
+                self._send_file_data(HashStr(toks_url[2]))
             elif PAGE_NAMES['yt_result'] == page_name:
                 self._send_yt_result(YoutubeId(toks_url[2]))
             elif PAGE_NAMES['missing'] == page_name:
@@ -269,9 +269,9 @@ class _TaskHandler(BaseHTTPRequestHandler):
                  'youtube_prefix': YOUTUBE_URL_PREFIX,
                  'queries': linked_queries})
 
-    def _send_file_data(self, rel_path_b64: B64Str) -> None:
+    def _send_file_data(self, sha512_digest: HashStr) -> None:
         with DbConn() as conn:
-            file = VideoFile.get_by_b64(conn, rel_path_b64)
+            file = VideoFile.get_one(conn, sha512_digest)
         self._send_rendered_template(
                 _NAME_TEMPLATE_FILE_DATA,
                 {'file': file, 'flag_names': list(FILE_FLAGS)})
diff --git a/src/ytplom/misc.py b/src/ytplom/misc.py
index 512c755..805f22f 100644
--- a/src/ytplom/misc.py
+++ b/src/ytplom/misc.py
@@ -3,7 +3,6 @@
 # included libs
 from typing import Any, Literal, NewType, Optional, Self, TypeAlias
 from os import chdir, environ
-from base64 import urlsafe_b64encode, urlsafe_b64decode
 from hashlib import file_digest
 from random import shuffle
 from time import time, sleep
@@ -40,7 +39,6 @@ FlagsInt = NewType('FlagsInt', int)
 HashStr = NewType('HashStr', str)
 AmountDownloads = NewType('AmountDownloads', int)
 PlayerUpdateId = NewType('PlayerUpdateId', str)
-B64Str = NewType('B64Str', str)
 UrlStr = NewType('UrlStr', str)
 FilesWithIndex: TypeAlias = list[tuple[int, 'VideoFile']]
 
@@ -68,7 +66,7 @@ QUOTA_COST_YOUTUBE_SEARCH = QuotaCost(100)
 QUOTA_COST_YOUTUBE_DETAILS = QuotaCost(1)
 
 # database stuff
-EXPECTED_DB_VERSION = 2
+EXPECTED_DB_VERSION = 3
 SQL_DB_VERSION = SqlText('PRAGMA user_version')
 PATH_MIGRATIONS = PATH_APP_DATA.joinpath('migrations')
 PATH_DB_SCHEMA = PATH_MIGRATIONS.joinpath(f'init_{EXPECTED_DB_VERSION}.sql')
@@ -167,6 +165,10 @@ class DbConn:
         """Wrapper around sqlite3.Connection.execute."""
         return self._conn.execute(sql, inputs)
 
+    def exec_script(self, path: Path) -> None:
+        """Simplified sqlite3.Connection.executescript."""
+        self._conn.executescript(path.read_text(encoding='utf8'))
+
     def commit(self) -> None:
         """Commit changes (i.e. DbData.save() calls) to database."""
         self._conn.commit()
@@ -365,15 +367,15 @@ class YoutubeVideo(DbData):
 
 class VideoFile(DbData):
     """Collects data about downloaded files."""
-    id_name = 'rel_path'
+    id_name = 'sha512_digest'
     _table_name = 'files'
-    _cols = ('rel_path', 'yt_id', 'flags', 'last_update', 'sha512_digest')
+    _cols = ('sha512_digest', 'rel_path', 'flags', 'yt_id', 'last_update')
     last_update: DatetimeStr
     rel_path: Path
 
     def __init__(self,
                  rel_path: Path,
-                 yt_id: YoutubeId,
+                 yt_id: Optional[YoutubeId] = None,
                  flags: FlagsInt = FlagsInt(0),
                  last_update: Optional[DatetimeStr] = None,
                  sha512_digest: Optional[HashStr] = None
@@ -404,16 +406,6 @@ class VideoFile(DbData):
             raise NotFoundException(f'no entry for file to Youtube ID {yt_id}')
         return cls._from_table_row(row)
 
-    @classmethod
-    def get_by_b64(cls, conn: DbConn, rel_path_b64: B64Str) -> Self:
-        """Retrieve by .rel_path provided as urlsafe_b64 encoding."""
-        return cls.get_one(conn, urlsafe_b64decode(rel_path_b64).decode())
-
-    @property
-    def rel_path_b64(self) -> B64Str:
-        """Return .rel_path as urlsafe_b64 e3ncoding."""
-        return B64Str(urlsafe_b64encode(str(self.rel_path).encode()).decode())
-
     @property
     def full_path(self) -> Path:
         """Return self.rel_path suffixed under PATH_DOWNLOADS."""