--- /dev/null
+ALTER TABLE files ADD COLUMN sha512_blob BLOB;
--- /dev/null
+CREATE TABLE files_new (
+ digest BLOB PRIMARY KEY,
+ rel_path TEXT NOT NULL,
+ flags INTEGER NOT NULL DEFAULT 0,
+ yt_id TEXT,
+ last_update TEXT NOT NULL,
+ FOREIGN KEY (yt_id) REFERENCES yt_videos(id)
+);
+INSERT INTO files_new SELECT
+ sha512_blob,
+ rel_path,
+ flags,
+ yt_id,
+ last_update
+FROM files;
+DROP TABLE files;
+ALTER TABLE files_new RENAME TO files;
cost INT NOT NULL
);
CREATE TABLE files (
- sha512_digest TEXT PRIMARY KEY,
+ digest BLOB PRIMARY KEY,
rel_path TEXT NOT NULL,
flags INTEGER NOT NULL DEFAULT 0,
yt_id TEXT,
<tr><th>YouTube ID:</th><td><a href="/{{page_names.yt_result}}/{{file.yt_id}}">{{file.yt_id}}</a></tr>
<tr><th>present:</th><td>{% if file.present %}<a href="/{{page_names.download}}/{{file.yt_id}}">yes</a>{% else %}no{% endif %}</td></tr>
</table>
-<form action="/{{page_names.file}}/{{file.sha512_digest}}" method="POST" />
+<form action="/{{page_names.file}}/{{file.digest.b64}}" method="POST" />
{% for flag_name in flag_names %}
{{ flag_name }}: <input type="checkbox" name="{{flag_name}}" {% if file.is_flag_set(flag_name) %}checked {% endif %} /><br />
{% endfor %}
{% for file in files %}
<tr>
<td>{{ file.size | round(3) }}</td>
-<td><input type="submit" name="play_{{file.sha512_digest}}" value="play" {% if not file.present %}disabled {% endif %}/></td>
-<td><a href="/{{page_names.file}}/{{file.sha512_digest}}">{{file.rel_path}}</a></td>
+<td><input type="submit" name="play_{{file.digest.b64}}" value="play" {% if not file.present %}disabled {% endif %}/></td>
+<td><a href="/{{page_names.file}}/{{file.digest.b64}}">{{file.rel_path}}</a></td>
</tr>
{% endfor %}
</table>
<input type="submit" name="up_{{idx}}" value="{% if reverse %}v{% else %}^{% endif %}" />
<input type="submit" name="down_{{idx}}" value="{% if reverse %}^{% else %}v{% endif %}" />
</td>
-<td><a href="/{{page_names.file}}/{{file.sha512_digest}}">{{ file.rel_path }}</a></td>
+<td><a href="/{{page_names.file}}/{{file.digest.b64}}">{{ file.rel_path }}</a></td>
</tr>
{% endfor %}
</table>
<table>
<tr><td id="status" colspan=2>
{% if running %}{% if pause %}PAUSED{% else %}PLAYING{% endif %}{% else %}STOPPED{% endif %}:<br />
-<a href="/{{page_names.file}}/{{current_video.sha512_digest}}">{{ current_video.rel_path }}</a><br />
+<a href="/{{page_names.file}}/{{current_video.digest.b64}}">{{ current_video.rel_path }}</a><br />
<form action="/{{page_names.playlist}}" method="POST">
<input type="submit" name="pause" autofocus value="{% if paused %}resume{% else %}pause{% endif %}">
<input type="submit" name="prev" value="prev">
"""Database access and management code."""
+from base64 import urlsafe_b64decode, urlsafe_b64encode
+from hashlib import file_digest
from pathlib import Path
from sqlite3 import (
connect as sql_connect, Connection as SqlConnection, Cursor, Row)
SqlText = NewType('SqlText', str)
-EXPECTED_DB_VERSION = 3
+EXPECTED_DB_VERSION = 5
PATH_DB = PATH_APP_DATA.joinpath('db.sql')
SQL_DB_VERSION = SqlText('PRAGMA user_version')
PATH_MIGRATIONS = PATH_APP_DATA.joinpath('migrations')
+_HASH_ALGO = 'sha512'
_PATH_DB_SCHEMA = PATH_MIGRATIONS.joinpath('new_init.sql')
_NAME_INSTALLER = Path('install.sh')
return list(conn.execute(SQL_DB_VERSION))[0][0]
+class Hash:
+ """Represents _HASH_ALGO hash of file."""
+
+ def __init__(self, as_bytes: bytes) -> None:
+ self.bytes = as_bytes
+
+ @classmethod
+ def from_file(cls, path: Path) -> Self:
+ """Hash-digest file at path, instantiate with hash's bytes."""
+ with path.open('rb') as f:
+ return cls(file_digest(f, _HASH_ALGO).digest())
+
+ @classmethod
+ def from_b64(cls, b64_str: str) -> Self:
+ """Instantiate from base64 string encoding of hash value."""
+ return cls(bytes(urlsafe_b64decode(b64_str)))
+
+ @property
+ def b64(self) -> str:
+ """Return hash bytes as base64-encoded string."""
+ return urlsafe_b64encode(self.bytes).decode('utf8')
+
+
class BaseDbConn:
"""Wrapper for pre-established sqlite3.Connection."""
"""Wrapper around sqlite3.Connection.execute."""
return self._conn.execute(sql, inputs)
+ def exec_on_values(self, sql: SqlText, inputs: tuple[Any, ...]) -> Cursor:
+ """Wraps .exec on inputs, affixes to sql proper ' VALUES (?, …)'."""
+ q_marks = '(' + ','.join(['?'] * len(inputs)) + ')'
+ return self._conn.execute(f'{sql} VALUES {q_marks}', inputs)
+
def commit(self) -> None:
"""Commit changes (i.e. DbData.save() calls) to database."""
self._conn.commit()
return cls(**kwargs)
@classmethod
- def get_one(cls, conn: BaseDbConn, id_: str) -> Self:
+ def get_one(cls, conn: BaseDbConn, id_: str | Hash) -> Self:
"""Return single entry of id_ from DB."""
sql = SqlText(f'SELECT * FROM {cls._table_name} '
f'WHERE {cls.id_name} = ?')
- row = conn.exec(sql, (id_,)).fetchone()
+ id__ = id_.bytes if isinstance(id_, Hash) else id_
+ row = conn.exec(sql, (id__,)).fetchone()
if not row:
msg = f'no entry found for ID "{id_}" in table {cls._table_name}'
raise NotFoundException(msg)
def save(self, conn: BaseDbConn) -> Cursor:
"""Save entry to DB."""
- vals = [getattr(self, col_name) for col_name in self._cols]
- q_marks = '(' + ','.join(['?'] * len(vals)) + ')'
- sql = SqlText(f'REPLACE INTO {self._table_name} VALUES {q_marks}')
- return conn.exec(sql, tuple(str(v) if isinstance(v, Path) else v
- for v in vals))
+ vals = []
+ for val in [getattr(self, col_name) for col_name in self._cols]:
+ if isinstance(val, Path):
+ val = str(val)
+ elif isinstance(val, Hash):
+ val = val.bytes
+ vals += [val]
+ return conn.exec_on_values(SqlText(f'REPLACE INTO {self._table_name}'),
+ tuple(vals))
from urllib.error import HTTPError
from jinja2 import ( # type: ignore
Environment as JinjaEnv, FileSystemLoader as JinjaFSLoader)
-from ytplom.db import DbConn
+from ytplom.db import Hash, DbConn
from ytplom.misc import (
- HashStr, FilesWithIndex, FlagName, PlayerUpdateId, QueryId, QueryText,
+ FilesWithIndex, FlagName, PlayerUpdateId, QueryId, QueryText,
QuotaCost, UrlStr, YoutubeId,
FILE_FLAGS, PATH_THUMBNAILS, YOUTUBE_URL_PREFIX,
ensure_expected_dirs,
if PAGE_NAMES['files'] == page_name:
self._receive_files_command(list(postvars.keys())[0])
elif PAGE_NAMES['file'] == page_name:
- self._receive_video_flag(HashStr(toks_url[2]),
+ self._receive_video_flag(Hash.from_b64(toks_url[2]),
[FlagName(k) for k in postvars])
elif PAGE_NAMES['yt_queries'] == page_name:
self._receive_yt_query(QueryText(postvars['query'][0]))
def _receive_files_command(self, command: str) -> None:
if command.startswith('play_'):
with DbConn() as conn:
- file = VideoFile.get_one(conn,
- HashStr(command.split('_', 1)[1]))
+ file = VideoFile.get_one(
+ conn, Hash.from_b64(command.split('_', 1)[1]))
self.server.player.inject_and_play(file)
self._redirect(Path('/'))
def _receive_video_flag(self,
- sha512_digest: HashStr,
+ digest: Hash,
flag_names: list[FlagName]
) -> None:
with DbConn() as conn:
- file = VideoFile.get_one(conn, sha512_digest)
+ file = VideoFile.get_one(conn, digest)
file.set_flags([FILE_FLAGS[name] for name in flag_names])
file.save(conn)
conn.commit()
file.ensure_absence_if_deleted()
self._redirect(Path('/')
.joinpath(PAGE_NAMES['file'])
- .joinpath(sha512_digest))
+ .joinpath(digest.b64))
def _receive_yt_query(self, query_txt: QueryText) -> None:
with DbConn() as conn:
show_absent = params.get('show_absent', [False])[0]
self._send_files_index(filter_, bool(show_absent))
elif PAGE_NAMES['file'] == page_name:
- self._send_file_data(HashStr(toks_url[2]))
+ self._send_file_data(Hash.from_b64(toks_url[2]))
elif PAGE_NAMES['yt_result'] == page_name:
self._send_yt_result(YoutubeId(toks_url[2]))
elif PAGE_NAMES['missing'] == page_name:
'youtube_prefix': YOUTUBE_URL_PREFIX,
'queries': linked_queries})
- def _send_file_data(self, sha512_digest: HashStr) -> None:
+ def _send_file_data(self, digest: Hash) -> None:
with DbConn() as conn:
- file = VideoFile.get_one(conn, sha512_digest)
+ file = VideoFile.get_one(conn, digest)
self._send_rendered_template(
_NAME_TEMPLATE_FILE_DATA,
{'file': file, 'flag_names': list(FILE_FLAGS)})
after_sql_steps: Optional[Callable] = None
) -> None:
self.version = version
+ self._filename_sql = filename_sql
self._sql_code = None
if filename_sql:
path_sql = PATH_MIGRATIONS.joinpath(filename_sql)
"""Apply to DB at path_db migration code stored in self."""
with sql_connect(path_db, autocommit=False) as conn:
if self._sql_code:
+ print(f'Executing {self._filename_sql}')
conn.executescript(self._sql_code)
if self._after_sql_steps:
+ print('Running additional steps')
self._after_sql_steps(conn)
conn.execute(SqlText(f'{SQL_DB_VERSION} = {self.version}'))
print('Finished migrations.')
+def _rewrite_files_last_field_processing_first_field(conn: BaseDbConn,
+ cb: Callable
+ ) -> None:
+ rows = conn.exec(SqlText('SELECT * FROM files')).fetchall()
+ for row in [list(r) for r in rows]:
+ row[-1] = cb(row[0])
+ conn.exec_on_values(SqlText('REPLACE INTO files'), tuple(row))
+
+
def _mig_2_calc_digests(sql_conn: SqlConnection) -> None:
"""Calculate sha512 digests to all known video files."""
+ # pylint: disable=import-outside-toplevel
from hashlib import file_digest
- from ytplom.misc import HashStr, VideoFile
+ from ytplom.misc import PATH_DOWNLOADS
conn = BaseDbConn(sql_conn)
- file_entries = VideoFile.get_all(conn)
- missing = [f for f in file_entries if not f.present]
+ rel_paths = [
+ p[0] for p
+ in conn.exec(SqlText('SELECT rel_path FROM files')).fetchall()]
+ missing = [p for p in rel_paths
+ if not Path(PATH_DOWNLOADS).joinpath(p).exists()]
if missing:
- print('WARNING: Cannot find files to following paths')
- for f in missing:
- print(f.full_path)
+ print('WARNING: Cannot find files to following (relative) paths:')
+ for path in missing:
+ print(path)
reply = input(
'WARNING: To continue migration, will have to delete above '
f'rows from DB. To continue, type (exactly) "{_LEGIT_YES}": ')
if _LEGIT_YES != reply:
raise HandledException('Migration aborted!')
- for f in missing:
- conn.exec(SqlText('DELETE FROM files WHERE rel_path = ?'),
- (str(f.rel_path),))
- for video_file in VideoFile.get_all(conn):
- print(f'Calculating digest for: {video_file.rel_path}')
- with open(video_file.full_path, 'rb') as vf:
- video_file.sha512_digest = HashStr(
- file_digest(vf, 'sha512').hexdigest())
- video_file.save(conn)
+ for path in missing:
+ conn.exec(SqlText('DELETE FROM files WHERE rel_path = ?'), (path,))
+
+ def hexdigest_file(path):
+ print(f'Calculating digest for: {path}')
+ with Path(PATH_DOWNLOADS).joinpath(path).open('rb') as vf:
+ return file_digest(vf, 'sha512').hexdigest()
+
+ _rewrite_files_last_field_processing_first_field(conn, hexdigest_file)
+
+
+def _mig_4_convert_digests(sql_conn: SqlConnection) -> None:
+ """Fill new files.sha512_blob field with binary .sha512_digest."""
+ _rewrite_files_last_field_processing_first_field(
+ BaseDbConn(sql_conn), bytes.fromhex)
MIGRATIONS = [
_Migration(0, Path('0_init.sql')),
_Migration(1, Path('1_add_files_last_updated.sql')),
_Migration(2, Path('2_add_files_sha512.sql'), _mig_2_calc_digests),
- _Migration(3, Path('3_files_redo.sql'))
+ _Migration(3, Path('3_files_redo.sql')),
+ _Migration(4, Path('4_add_files_sha512_blob.sql'), _mig_4_convert_digests),
+ _Migration(5, Path('5_files_redo.sql'))
]
# included libs
from typing import NewType, Optional, Self, TypeAlias
from os import chdir, environ
-from hashlib import file_digest
from random import shuffle
from time import time, sleep
from datetime import datetime, timedelta
from mpv import MPV # type: ignore
from yt_dlp import YoutubeDL # type: ignore
# ourselves
-from ytplom.db import BaseDbConn, DbConn, DbData, SqlText
+from ytplom.db import BaseDbConn, DbConn, DbData, Hash, SqlText
from ytplom.primitives import HandledException, NotFoundException
ProseText = NewType('ProseText', str)
FlagName = NewType('FlagName', str)
FlagsInt = NewType('FlagsInt', int)
-HashStr = NewType('HashStr', str)
AmountDownloads = NewType('AmountDownloads', int)
PlayerUpdateId = NewType('PlayerUpdateId', str)
UrlStr = NewType('UrlStr', str)
def save_to_query(self, conn: BaseDbConn, query_id: QueryId) -> None:
"""Save inclusion of self in results to query of query_id."""
- conn.exec(SqlText('REPLACE INTO yt_query_results VALUES (?, ?)'),
- (query_id, self.id_))
+ conn.exec_on_values(SqlText('REPLACE INTO yt_query_results'),
+ (query_id, self.id_))
class VideoFile(DbData):
"""Collects data about downloaded files."""
- id_name = 'sha512_digest'
+ id_name = 'digest'
_table_name = 'files'
- _cols = ('sha512_digest', 'rel_path', 'flags', 'yt_id', 'last_update')
+ _cols = ('digest', 'rel_path', 'flags', 'yt_id', 'last_update')
last_update: DatetimeStr
rel_path: Path
+ digest: Hash
def __init__(self,
+ digest: Optional[Hash],
rel_path: Path,
- yt_id: Optional[YoutubeId] = None,
flags: FlagsInt = FlagsInt(0),
- last_update: Optional[DatetimeStr] = None,
- sha512_digest: Optional[HashStr] = None
+ yt_id: Optional[YoutubeId] = None,
+ last_update: Optional[DatetimeStr] = None
) -> None:
self.rel_path = rel_path
+ self.digest = digest if digest else Hash.from_file(self.full_path)
self.yt_id = yt_id
self.flags = flags
if last_update is None:
self._renew_last_update()
else:
self.last_update = last_update
- if sha512_digest is None:
- with self.full_path.open('rb') as f:
- self.sha512_digest = HashStr(
- file_digest(f, 'sha512').hexdigest())
- else:
- self.sha512_digest = sha512_digest
def _renew_last_update(self):
self.last_update = DatetimeStr(datetime.now().strftime(TIMESTAMP_FMT))
if p.is_file() and p not in known_paths]:
yt_id = self._id_from_filename(path)
print(f'SYNC: new file {path}, saving to YT ID "{yt_id}".')
- file = VideoFile(path, yt_id)
+ file = VideoFile(digest=None, rel_path=path, yt_id=yt_id)
file.save(conn)
self._files = VideoFile.get_all(conn)
for file in self._files: