home · contact · privacy
Commit of work done so far.
[url-catcher] / url_catcher.py
1 import bottle
2 import validators
3 import html
4 import os
5 import os.path
6 import time
7 import json
8 import smtplib
9 import email.mime.text
10 import tempfile
11 import shutil
12
13 slowdown_reset = 60 * 60 * 24
14 ips_dir = 'ips'
15 lists_dir = 'lists'
16 captchas_dir = 'captchas'
17 customizations_path = 'customizations.json'
18 messages = {
19     'internalServerError': 'Internal server error.',
20     'badPageName': 'Bad page name.',
21     'wrongCaptcha': 'Wrong captcha.',
22     'invalidURL': 'Invalid URL.',
23     'recordedURL': 'Recorded URL: ',
24     'pleaseWait': 'Too many attempts from your IP. Wait this many seconds: ',
25     'mailSubject': '[url_catcher.py] New URL submitted',
26     'mailBodyPage': 'New URL submitted for page: ',
27     'mailBodyURL': 'URL is: ',
28 }
29 mail_config = {
30     'from': 'foo@example.org',
31     'to': 'bar@example.org',
32 }
33 if os.path.isfile(customizations_path):
34     customizations_file = open(customizations_path)
35     customizations = json.load(customizations_file)
36     customizations_file.close()
37     for key in customizations['translations']:
38         messages[key] = customizations['translations'][key]
39     for key in customizations['mailConfig']:
40         mail_config[key] = customizations['mailConfig'][key]
41     if 'slowdownReset' in customizations:
42         slowdown_reset = customizations['slowdownReset']
43 os.makedirs(ips_dir, exist_ok=True)
44 os.makedirs(lists_dir, exist_ok=True)
45
46
47 def atomic_write(path, content, mode):
48     """Atomic write/append to file."""
49     _, tmpPath = tempfile.mkstemp()
50     if 'a' == mode:
51         shutil.copy2(path, tmpPath)
52     f = open(tmpPath, mode)
53     f.write(content)
54     f.flush()
55     os.fsync(f.fileno())
56     f.close()
57     os.rename(tmpPath, path)
58
59
60 def send_mail(page, url):
61     """Send mail telling about page URL list update."""
62     body = messages['mailBodyPage'] + page + '\n' + messages['mailBodyURL'] + \
63         url
64     msg = email.mime.text.MIMEText(body)
65     msg['Subject'] = messages['mailSubject']
66     msg['From'] = mail_config['from']
67     msg['To'] = mail_config['to']
68     s = smtplib.SMTP('localhost')
69     s.send_message(msg)
70     s.quit()
71
72
73 @bottle.error(500)
74 def internal_error(error):
75     """If trouble, don't leak bottle.py's detailed error description."""
76     return messages['internalServerError']
77
78
79 @bottle.post('/uwsgi/post_link')
80 def post_link():
81     """Record URL if all sane, send mail to curator."""
82
83     # Slow down repeat requests.
84     now = int(time.time())
85     start_date = now
86     attempts = 0
87     rewrite = True
88     ip = bottle.request.environ.get('REMOTE_ADDR')
89     ip_file_path = ips_dir + '/' + ip
90     try:
91         if os.path.isfile(ip_file_path):
92             ip_file = open(ip_file_path, 'r')
93             ip_data = ip_file.readlines()
94             ip_file.close()
95             old_start_date = int(ip_data[0])
96             if old_start_date + slowdown_reset > now:
97                 attempts = int(ip_data[1])
98                 start_date = old_start_date
99                 wait_period = 2**attempts
100                 if start_date + wait_period > now:
101                     limit = min(start_date + wait_period,
102                         start_date + slowdown_reset)
103                     rewrite = False
104                     remaining_wait = limit - now
105                     msg = messages['pleaseWait'] + str(remaining_wait)
106                     return bottle.HTTPResponse(msg, 429,
107                         {'Retry-After': str(remaining_wait)})
108                 attempts += 1 
109     except:
110         raise
111     finally:
112         if rewrite:
113             atomic_write(ip_file_path,
114                 str(start_date) + '\n' + str(attempts), 'w')
115
116     # Derive page / page file name.
117     page = bottle.request.forms.get('page')
118     if '\0' in page or '/' in page or '.' in page or len(page.encode()) > 255:
119         return bottle.HTTPResponse(messages['badPageName'], 400)
120
121     # Test captcha.
122     captcha_file = open(captchas_dir + '/' + page, 'r')
123     captcha_correct = captcha_file.readline().rstrip()
124     captcha_file.close()
125     captcha_input = bottle.request.forms.get('captcha')
126     if captcha_correct != captcha_input:
127         return bottle.HTTPResponse(messages['wrongCaptcha'], 400)
128
129     # Record URL.
130     url = bottle.request.forms.get('url')
131     if not validators.url(url):
132         return bottle.HTTPResponse(messages['invalidURL'], 400)
133     send_mail(page, url)
134     atomic_write(lists_dir + '/' + page, url + '\n', 'a')
135     url_html = html.escape(url)
136
137     # Response body.
138     return messages['recordedURL'] + url_html
139
140
141 bottle.debug(True)
142 # Non-uWSGI mode.
143 if __name__ == '__main__':
144     bottle.run(host='localhost', port=8080)
145 # uWSGI mode.
146 else:
147     app = application = bottle.default_app()