From 716ab0d4ff7ed7704684a2d337df46e6d9ac93b6 Mon Sep 17 00:00:00 2001 From: Christian Heller Date: Sun, 1 Dec 2024 08:42:28 +0100 Subject: [PATCH] Turn files.sha512_digest into new primary key, get rid of .rel_path base64-encoding shenanigans. --- src/migrate.py | 3 +-- src/migrations/2_add_files_sha512.py | 4 +-- src/migrations/3_files_redo.sql | 32 +++++++++++++++++++++++ src/migrations/{init_2.sql => init_3.sql} | 8 +++--- src/templates/file_data.tmpl | 2 +- src/templates/files.tmpl | 4 +-- src/templates/playlist.tmpl | 4 +-- src/ytplom/http.py | 20 +++++++------- src/ytplom/misc.py | 24 ++++++----------- 9 files changed, 62 insertions(+), 39 deletions(-) create mode 100644 src/migrations/3_files_redo.sql rename src/migrations/{init_2.sql => init_3.sql} (80%) diff --git a/src/migrate.py b/src/migrate.py index fc63965..e1ba4de 100755 --- a/src/migrate.py +++ b/src/migrate.py @@ -47,8 +47,7 @@ def main() -> None: msg_apply_prefix = f'Applying migration {version}: ' for path in [p for p in sorted_paths if _SUFFIX_SQL == p.suffix]: print(f'{msg_apply_prefix}{path}') - sql = SqlText(path.read_text(encoding='utf8')) - conn.exec(sql) + conn.exec_script(path) for path in [p for p in sorted_paths if _SUFFIX_PY == p.suffix]: spec = spec_from_file_location(str(path), path) assert spec is not None diff --git a/src/migrations/2_add_files_sha512.py b/src/migrations/2_add_files_sha512.py index 329286f..0e10011 100644 --- a/src/migrations/2_add_files_sha512.py +++ b/src/migrations/2_add_files_sha512.py @@ -22,7 +22,7 @@ def migrate(conn: DbConn) -> None: (str(f.rel_path),)) for file in VideoFile.get_all(conn): print(f'Calculating digest for: {file.rel_path}') - with open(file.full_path, 'rb') as x: + with open(file.full_path, 'rb') as f: file.sha512_digest = HashStr( - file_digest(x, 'sha512').hexdigest()) + file_digest(f, 'sha512').hexdigest()) file.save(conn) diff --git a/src/migrations/3_files_redo.sql b/src/migrations/3_files_redo.sql new file mode 100644 index 0000000..bf485bb --- /dev/null +++ b/src/migrations/3_files_redo.sql @@ -0,0 +1,32 @@ +CREATE TEMPORARY TABLE files_backup ( + rel_path TEXT PRIMARY KEY, + yt_id TEXT NOT NULL DEFAULT "", + flags INTEGER NOT NULL DEFAULT 0, + last_update TEXT NOT NULL DEFAULT "2000-01-01 12:00:00.123456", + sha512_digest TEXT NOT NULL, + FOREIGN KEY (yt_id) REFERENCES yt_videos(id) +); +INSERT INTO files_backup SELECT + rel_path, + yt_id, + flags, + last_update, + sha512_digest +FROM files; +DROP TABLE files; +CREATE TABLE files ( + sha512_digest TEXT PRIMARY KEY, + rel_path TEXT NOT NULL, + flags INTEGER NOT NULL DEFAULT 0, + yt_id TEXT, + last_update TEXT NOT NULL, + FOREIGN KEY (yt_id) REFERENCES yt_videos(id) +); +INSERT INTO files SELECT + sha512_digest, + rel_path, + flags, + yt_id, + last_update +FROM files_backup; +DROP TABLE files_backup; diff --git a/src/migrations/init_2.sql b/src/migrations/init_3.sql similarity index 80% rename from src/migrations/init_2.sql rename to src/migrations/init_3.sql index aaa866b..d223bef 100644 --- a/src/migrations/init_2.sql +++ b/src/migrations/init_3.sql @@ -24,10 +24,10 @@ CREATE TABLE quota_costs ( cost INT NOT NULL ); CREATE TABLE files ( - rel_path TEXT PRIMARY KEY, - yt_id TEXT NOT NULL DEFAULT "", + sha512_digest TEXT PRIMARY KEY, + rel_path TEXT NOT NULL, flags INTEGER NOT NULL DEFAULT 0, - last_update TEXT NOT NULL DEFAULT "2000-01-01 12:00:00.123456", - sha512_digest TEXT NOT NULL DEFAULT "", + yt_id TEXT, + last_update TEXT NOT NULL, FOREIGN KEY (yt_id) REFERENCES yt_videos(id) ); diff --git a/src/templates/file_data.tmpl b/src/templates/file_data.tmpl index d0e606d..589ef26 100644 --- a/src/templates/file_data.tmpl +++ b/src/templates/file_data.tmpl @@ -8,7 +8,7 @@ YouTube ID:{{file.yt_id}} present:{% if file.present %}yes{% else %}no{% endif %} -
+ {% for flag_name in flag_names %} {{ flag_name }}:
{% endfor %} diff --git a/src/templates/files.tmpl b/src/templates/files.tmpl index 2d58c5b..f2a8024 100644 --- a/src/templates/files.tmpl +++ b/src/templates/files.tmpl @@ -15,8 +15,8 @@ show absent: {{ file.size | round(3) }} - -{{file.rel_path}} + +{{file.rel_path}} {% endfor %} diff --git a/src/templates/playlist.tmpl b/src/templates/playlist.tmpl index bbf441e..494a7a1 100644 --- a/src/templates/playlist.tmpl +++ b/src/templates/playlist.tmpl @@ -48,7 +48,7 @@ td.entry_buttons { width: 5em; } -{{ file.rel_path }} +{{ file.rel_path }} {% endfor %} @@ -61,7 +61,7 @@ td.entry_buttons { width: 5em; }
{% if running %}{% if pause %}PAUSED{% else %}PLAYING{% endif %}{% else %}STOPPED{% endif %}:
-{{ current_video.rel_path }}
+{{ current_video.rel_path }}
diff --git a/src/ytplom/http.py b/src/ytplom/http.py index 24c40b3..29f06ee 100644 --- a/src/ytplom/http.py +++ b/src/ytplom/http.py @@ -10,7 +10,7 @@ from urllib.error import HTTPError from jinja2 import ( # type: ignore Environment as JinjaEnv, FileSystemLoader as JinjaFSLoader) from ytplom.misc import ( - B64Str, FilesWithIndex, FlagName, NotFoundException, PlayerUpdateId, + HashStr, FilesWithIndex, FlagName, NotFoundException, PlayerUpdateId, QueryId, QueryText, QuotaCost, UrlStr, YoutubeId, FILE_FLAGS, PATH_APP_DATA, PATH_THUMBNAILS, YOUTUBE_URL_PREFIX, ensure_expected_dirs, @@ -104,7 +104,7 @@ class _TaskHandler(BaseHTTPRequestHandler): if PAGE_NAMES['files'] == page_name: self._receive_files_command(list(postvars.keys())[0]) elif PAGE_NAMES['file'] == page_name: - self._receive_video_flag(B64Str(toks_url[2]), + self._receive_video_flag(HashStr(toks_url[2]), [FlagName(k) for k in postvars]) elif PAGE_NAMES['yt_queries'] == page_name: self._receive_yt_query(QueryText(postvars['query'][0])) @@ -132,24 +132,24 @@ class _TaskHandler(BaseHTTPRequestHandler): def _receive_files_command(self, command: str) -> None: if command.startswith('play_'): with DbConn() as conn: - file = VideoFile.get_by_b64(conn, - B64Str(command.split('_', 1)[1])) + file = VideoFile.get_one(conn, + HashStr(command.split('_', 1)[1])) self.server.player.inject_and_play(file) self._redirect(Path('/')) def _receive_video_flag(self, - rel_path_b64: B64Str, + sha512_digest: HashStr, flag_names: list[FlagName] ) -> None: with DbConn() as conn: - file = VideoFile.get_by_b64(conn, rel_path_b64) + file = VideoFile.get_one(conn, sha512_digest) file.set_flags([FILE_FLAGS[name] for name in flag_names]) file.save(conn) conn.commit() file.ensure_absence_if_deleted() self._redirect(Path('/') .joinpath(PAGE_NAMES['file']) - .joinpath(rel_path_b64)) + .joinpath(sha512_digest)) def _receive_yt_query(self, query_txt: QueryText) -> None: with DbConn() as conn: @@ -176,7 +176,7 @@ class _TaskHandler(BaseHTTPRequestHandler): show_absent = params.get('show_absent', [False])[0] self._send_files_index(filter_, bool(show_absent)) elif PAGE_NAMES['file'] == page_name: - self._send_file_data(B64Str(toks_url[2])) + self._send_file_data(HashStr(toks_url[2])) elif PAGE_NAMES['yt_result'] == page_name: self._send_yt_result(YoutubeId(toks_url[2])) elif PAGE_NAMES['missing'] == page_name: @@ -269,9 +269,9 @@ class _TaskHandler(BaseHTTPRequestHandler): 'youtube_prefix': YOUTUBE_URL_PREFIX, 'queries': linked_queries}) - def _send_file_data(self, rel_path_b64: B64Str) -> None: + def _send_file_data(self, sha512_digest: HashStr) -> None: with DbConn() as conn: - file = VideoFile.get_by_b64(conn, rel_path_b64) + file = VideoFile.get_one(conn, sha512_digest) self._send_rendered_template( _NAME_TEMPLATE_FILE_DATA, {'file': file, 'flag_names': list(FILE_FLAGS)}) diff --git a/src/ytplom/misc.py b/src/ytplom/misc.py index 512c755..805f22f 100644 --- a/src/ytplom/misc.py +++ b/src/ytplom/misc.py @@ -3,7 +3,6 @@ # included libs from typing import Any, Literal, NewType, Optional, Self, TypeAlias from os import chdir, environ -from base64 import urlsafe_b64encode, urlsafe_b64decode from hashlib import file_digest from random import shuffle from time import time, sleep @@ -40,7 +39,6 @@ FlagsInt = NewType('FlagsInt', int) HashStr = NewType('HashStr', str) AmountDownloads = NewType('AmountDownloads', int) PlayerUpdateId = NewType('PlayerUpdateId', str) -B64Str = NewType('B64Str', str) UrlStr = NewType('UrlStr', str) FilesWithIndex: TypeAlias = list[tuple[int, 'VideoFile']] @@ -68,7 +66,7 @@ QUOTA_COST_YOUTUBE_SEARCH = QuotaCost(100) QUOTA_COST_YOUTUBE_DETAILS = QuotaCost(1) # database stuff -EXPECTED_DB_VERSION = 2 +EXPECTED_DB_VERSION = 3 SQL_DB_VERSION = SqlText('PRAGMA user_version') PATH_MIGRATIONS = PATH_APP_DATA.joinpath('migrations') PATH_DB_SCHEMA = PATH_MIGRATIONS.joinpath(f'init_{EXPECTED_DB_VERSION}.sql') @@ -167,6 +165,10 @@ class DbConn: """Wrapper around sqlite3.Connection.execute.""" return self._conn.execute(sql, inputs) + def exec_script(self, path: Path) -> None: + """Simplified sqlite3.Connection.executescript.""" + self._conn.executescript(path.read_text(encoding='utf8')) + def commit(self) -> None: """Commit changes (i.e. DbData.save() calls) to database.""" self._conn.commit() @@ -365,15 +367,15 @@ class YoutubeVideo(DbData): class VideoFile(DbData): """Collects data about downloaded files.""" - id_name = 'rel_path' + id_name = 'sha512_digest' _table_name = 'files' - _cols = ('rel_path', 'yt_id', 'flags', 'last_update', 'sha512_digest') + _cols = ('sha512_digest', 'rel_path', 'flags', 'yt_id', 'last_update') last_update: DatetimeStr rel_path: Path def __init__(self, rel_path: Path, - yt_id: YoutubeId, + yt_id: Optional[YoutubeId] = None, flags: FlagsInt = FlagsInt(0), last_update: Optional[DatetimeStr] = None, sha512_digest: Optional[HashStr] = None @@ -404,16 +406,6 @@ class VideoFile(DbData): raise NotFoundException(f'no entry for file to Youtube ID {yt_id}') return cls._from_table_row(row) - @classmethod - def get_by_b64(cls, conn: DbConn, rel_path_b64: B64Str) -> Self: - """Retrieve by .rel_path provided as urlsafe_b64 encoding.""" - return cls.get_one(conn, urlsafe_b64decode(rel_path_b64).decode()) - - @property - def rel_path_b64(self) -> B64Str: - """Return .rel_path as urlsafe_b64 e3ncoding.""" - return B64Str(urlsafe_b64encode(str(self.rel_path).encode()).decode()) - @property def full_path(self) -> Path: """Return self.rel_path suffixed under PATH_DOWNLOADS.""" -- 2.30.2