+#!/usr/bin/env python3
+from os import environ, makedirs, scandir, remove as os_remove
+from os.path import (isdir, exists as path_exists, join as path_join, splitext,
+ basename)
+from time import sleep
+from json import load as json_load, dump as json_dump
+from datetime import datetime, timedelta
+from threading import Thread
+from http.server import HTTPServer, BaseHTTPRequestHandler
+from urllib.parse import urlparse, parse_qs
+from urllib.request import urlretrieve
+from hashlib import md5
+
+from jinja2 import Template
+from yt_dlp import YoutubeDL
+import googleapiclient.discovery
+
+API_KEY = environ.get('GOOGLE_API_KEY')
+
+HTTP_PORT = 8083
+PATH_QUOTA_LOG = 'quota_log.json'
+PATH_DIR_DOWNLOADS = 'downloads'
+PATH_DIR_THUMBNAILS = 'thumbnails'
+PATH_DIR_REQUESTS_CACHE = 'cache_googleapi'
+PATH_DIR_TEMPLATES = 'templates'
+NAME_DIR_TEMP = 'temp'
+NAME_TEMPLATE_INDEX = 'index.tmpl'
+NAME_TEMPLATE_RESULTS = 'results.tmpl'
+
+PATH_DIR_TEMP = path_join(PATH_DIR_DOWNLOADS, NAME_DIR_TEMP)
+EXPECTED_DIRS = [PATH_DIR_DOWNLOADS, PATH_DIR_TEMP, PATH_DIR_THUMBNAILS,
+ PATH_DIR_REQUESTS_CACHE]
+PATH_TEMPLATE_INDEX = path_join(PATH_DIR_TEMPLATES, NAME_TEMPLATE_INDEX)
+TIMESTAMP_FMT = '%Y-%m-%d %H:%M:%S.%f'
+YOUTUBE_URL_PREFIX = 'https://www.youtube.com/watch?v='
+
+QUOTA_COST_YOUTUBE_SEARCH = 100
+QUOTA_COST_YOUTUBE_DETAILS = 1
+
+to_download = []
+
+
+def ensure_expected_dirs_and_files():
+ for dir_name in EXPECTED_DIRS:
+ if not path_exists(dir_name):
+ print(f'creating expected directory: {dir_name}')
+ makedirs(dir_name)
+ elif not isdir(dir_name):
+ msg = f'at expected directory path {dir_name} found non-directory'
+ raise Exception(msg)
+ if not path_exists(PATH_QUOTA_LOG):
+ with open(PATH_QUOTA_LOG, 'w', encoding='utf8') as f:
+ f.write('{}')
+ else:
+ try:
+ read_quota_log() # just to check if we can
+ except Exception as e:
+ print(f'Trouble reading quota log file at {PATH_QUOTA_LOG}:')
+ raise e
+
+
+def clean_unfinished_downloads():
+ for e in [e for e in scandir(PATH_DIR_TEMP) if e.is_file]:
+ print(f'removing unfinished download: {e.path}')
+ os_remove(e.path)
+
+
+def run_server():
+ server = HTTPServer(('localhost', HTTP_PORT), TaskHandler)
+ print(f'running at port {HTTP_PORT}')
+ try:
+ server.serve_forever()
+ except KeyboardInterrupt:
+ print('aborted due to keyboard interrupt; '
+ 'repeat to end download thread too')
+ server.server_close()
+
+
+def read_quota_log():
+ with open(PATH_QUOTA_LOG, 'r', encoding='utf8') as f:
+ log = json_load(f)
+ ret = {}
+ now = datetime.now()
+ for time, amount in log.items():
+ then = datetime.strptime(time, TIMESTAMP_FMT)
+ if then >= now - timedelta(days=1):
+ ret[time] = amount
+ return ret
+
+
+def update_quota_log(now, cost):
+ quota_log = read_quota_log()
+ quota_log[now] = quota_log.get(now, 0) + cost
+ with open(PATH_QUOTA_LOG, 'w', encoding='utf8') as f:
+ json_dump(quota_log, f)
+
+
+def download_thread():
+ while True:
+ sleep(0.5)
+ try:
+ video_id = to_download.pop(0)
+ except IndexError:
+ continue
+ url = f'{YOUTUBE_URL_PREFIX}{video_id}'
+ params = {'paths': {'home': PATH_DIR_DOWNLOADS, 'temp': NAME_DIR_TEMP}}
+ with YoutubeDL(params) as ydl:
+ ydl.download([url])
+
+
+class TaskHandler(BaseHTTPRequestHandler):
+
+ def _send_http(self, content=None, headers=None, code=200):
+ headers = headers if headers else []
+ self.send_response(code)
+ for header_tuple in headers:
+ self.send_header(header_tuple[0], header_tuple[1])
+ self.end_headers()
+ if content is not None:
+ self.wfile.write(content)
+
+ def do_POST(self):
+ length = int(self.headers['content-length'])
+ postvars = parse_qs(self.rfile.read(length).decode())
+ query = postvars['query'][0]
+ youtube = googleapiclient.discovery.build('youtube', 'v3',
+ developerKey=API_KEY)
+ now = datetime.now().strftime(TIMESTAMP_FMT)
+
+ update_quota_log(now, QUOTA_COST_YOUTUBE_SEARCH)
+ request = youtube.search().list(part='snippet', maxResults=25, q=query,
+ safeSearch='none', type='video')
+ response = request.execute()
+ to_save = {'text': query, 'retrieved_at': now, 'results': []}
+ ids_for_details = []
+ for item in response['items']:
+ video_id = item['id']['videoId']
+ ids_for_details += [video_id]
+ snippet = item['snippet']
+ to_save['results'] += [{'id': video_id,
+ 'title': snippet['title'],
+ 'description': snippet['description'],
+ 'published_at': snippet['publishedAt'],
+ }]
+ thumbnail_url = item['snippet']['thumbnails']['default']['url']
+ store_at = path_join(PATH_DIR_THUMBNAILS, f'{video_id}.jpg')
+ urlretrieve(thumbnail_url, store_at)
+
+ update_quota_log(now, QUOTA_COST_YOUTUBE_DETAILS)
+ request = youtube.videos().list(id=','.join(ids_for_details),
+ part='content_details')
+ details = request.execute()
+ for i, detailed in enumerate(details['items']):
+ item = to_save['results'][i]
+ assert item['id'] == detailed['id']
+ item['duration'] = detailed['contentDetails']['duration']
+ item['definition'] = detailed['contentDetails']['definition']
+
+ md5sum = md5(query.encode()).hexdigest()
+ path = path_join(PATH_DIR_REQUESTS_CACHE, f'{md5sum}.json')
+ with open(path, 'w', encoding='utf8') as f:
+ json_dump(to_save, f)
+ self._send_http(headers=[('Location', f'/query/{md5sum}')], code=302)
+
+ def do_GET(self):
+ parsed_url = urlparse(self.path)
+ toks_url = parsed_url.path.split('/')
+ page = toks_url[1]
+
+ if 'thumbnails' == page:
+ filename = toks_url[2]
+ with open(path_join(PATH_DIR_THUMBNAILS, filename), 'rb') as f:
+ img = f.read()
+ self._send_http(img, [('Content-type', 'image/jpg')])
+ return
+
+ downloaded = {}
+ for e in [e for e in scandir(PATH_DIR_DOWNLOADS) if e.is_file]:
+ before_ext, _ = splitext(e.path)
+ id_ = before_ext.split('[')[-1].split(']')[0]
+ downloaded[id_] = e.path
+
+ if 'dl' == page:
+ video_id = toks_url[2]
+ if video_id in downloaded:
+ with open(downloaded[video_id], 'rb') as f:
+ video = f.read()
+ self._send_http(content=video)
+ return
+ to_download.append(video_id)
+ params = parse_qs(parsed_url.query)
+ query_id = params.get('from_query', [''])[0]
+ redir_path = f'/query/{query_id}' if query_id else '/'
+ self._send_http(headers=[('Location', redir_path)], code=302)
+ return
+
+ kwargs = {'quota_count': 0}
+ for amount in read_quota_log().values():
+ kwargs['quota_count'] += amount
+ if 'query' == page:
+ tmpl_name = NAME_TEMPLATE_RESULTS
+ kwargs['youtube_prefix'] = YOUTUBE_URL_PREFIX
+ query_id = toks_url[2]
+ kwargs['query_id'] = query_id
+ path = path_join(PATH_DIR_REQUESTS_CACHE, f'{query_id}.json')
+ with open(path, 'r', encoding='utf8') as f:
+ query = json_load(f)
+ for result in query['results']:
+ result['available'] = result['id'] in downloaded
+ date_dur, time_dur_remains = result['duration'].split('T')
+ seconds = 0
+ date_dur_remains = date_dur[1:]
+ for dur_char, len_seconds in (('Y', 60*60*24*365.25),
+ ('M', 60*60*24*30),
+ ('D', 60*60*24)):
+ if dur_char in date_dur_remains:
+ dur_str, date_dur_remains = date_dur_remains.split(dur_char)
+ seconds += int(dur_str) * len_seconds
+ for dur_char, len_seconds in (('H', 60*60),
+ ('M', 60),
+ ('S', 1)):
+ if dur_char in time_dur_remains:
+ dur_str, time_dur_remains = time_dur_remains.split(dur_char)
+ seconds += int(dur_str) * len_seconds
+ seconds_str = str(seconds % 60)
+ minutes_str = str(seconds // 60)
+ hours_str = str(seconds // (60 * 60))
+ result['duration'] = ':'.join(
+ [f'0{str_}' if len(str_) == 1 else str_
+ for str_ in (hours_str, minutes_str, seconds_str)])
+ result['definition'] = result['definition'].upper()
+ kwargs['query'] = query
+ else:
+ tmpl_name = NAME_TEMPLATE_INDEX
+ queries = []
+ for file in [f for f in scandir(PATH_DIR_REQUESTS_CACHE)
+ if f.is_file]:
+ id_, _ = splitext(basename(file.path))
+ with open(file.path, 'r', encoding='utf8') as f:
+ query = json_load(f)
+ query['id'] = id_
+ for result in query['results']:
+ result['available'] = result['id'] in downloaded
+ query['downloads'] = len([result for result in query['results']
+ if result['available']])
+ queries += [query]
+ queries.sort(key=lambda q: q['retrieved_at'], reverse=True)
+ kwargs['queries'] = queries
+ path = path_join(PATH_DIR_TEMPLATES, tmpl_name)
+ with open(path, 'r', encoding='utf8') as f:
+ tmpl = Template(f.read())
+ html = tmpl.render(**kwargs)
+ self._send_http(bytes(html, 'utf8'))
+
+
+if __name__ == '__main__':
+ to_download = []
+ ensure_expected_dirs_and_files()
+ clean_unfinished_downloads()
+ Thread(target=download_thread, daemon=False).start()
+ run_server()