import select
import time
import re
-import urllib.request
-import http.client
-import html
+import requests
+import bs4
+import random
+import hashlib
+import os
# Defaults, may be overwritten by command line arguments.
SERVER = "irc.freenode.net"
def act_on_privmsg(tokens):
+ def notice(msg):
+ io.send_line("NOTICE " + target + " :" + msg)
+
def url_check(msg):
- def notice(msg):
- io.send_line("NOTICE " + target + " :" + msg)
+ def handle_url(url):
+
+ def mobile_twitter_hack(url):
+ re1 = 'https?://(mobile.twitter.com/)[^/]+(/status/)'
+ re2 = 'https?://mobile.twitter.com/([^/]+)/status/([^\?]+)'
+ m = re.search(re1, url)
+ if m and m.group(1) == 'mobile.twitter.com/' \
+ and m.group(2) == '/status/':
+ m = re.search(re2, url)
+ url = 'https://twitter.com/' + m.group(1) + '/status/' \
+ + m.group(2)
+ handle_url(url)
+ return True
- matches = re.findall("(https?://[^\s>]+)", msg)
- for i in range(len(matches)):
- url = matches[i]
- request = urllib.request.Request(url, headers={
- "User-Agent": "plomlombot"
- })
try:
- webpage = urllib.request.urlopen(request, timeout=15)
- except (urllib.error.HTTPError, urllib.error.URLError,
- UnicodeError, http.client.BadStatusLine) as error:
+ r = requests.get(url, timeout=15)
+ except (requests.exceptions.TooManyRedirects,
+ requests.exceptions.ConnectionError,
+ requests.exceptions.InvalidURL,
+ requests.exceptions.InvalidSchema) as error:
notice("TROUBLE FOLLOWING URL: " + str(error))
- continue
- charset = webpage.info().get_content_charset()
- if not charset:
- notice("TROUBLE READING PAGE TITLE: no charset in header")
- continue
- content_type = webpage.info().get_content_type()
- if content_type not in ('text/html', 'text/xml',
- 'application/xhtml+xml'):
- notice("TROUBLE READING PAGE TITLE: bad content type "
- + content_type)
- continue
- content = webpage.read().decode(charset)
- title = str(content).split('<title>')[1].split('</title>')[0]
- title = html.unescape(title)
- notice("PAGE TITLE FOR URL: " + title)
+ return
+ if mobile_twitter_hack(url):
+ return
+ title = bs4.BeautifulSoup(r.text).title
+ if title:
+ notice("PAGE TITLE: " + title.string.strip())
+ else:
+ notice("PAGE HAS NO TITLE TAG")
+
+ matches = re.findall("(https?://[^\s>]+)", msg)
+ for i in range(len(matches)):
+ handle_url(matches[i])
+
+ def command_check(msg):
+ if msg[0] != "!":
+ return
+ tokens = msg[1:].split()
+ hash_string = hashlib.md5(target.encode("utf-8")).hexdigest()
+ quotesfile_name = "quotes_" + hash_string
+ if tokens[0] == "addquote":
+ if not os.access(quotesfile_name, os.F_OK):
+ quotesfile = open(quotesfile_name, "w")
+ quotesfile.write("QUOTES FOR " + target + ":\n")
+ quotesfile.close()
+ quotesfile = open(quotesfile_name, "a")
+ quotesfile.write(str.join(" ", tokens[1:]) + "\n")
+ quotesfile.close()
+ quotesfile = open(quotesfile_name, "r")
+ lines = quotesfile.readlines()
+ quotesfile.close()
+ notice("ADDED QUOTE #" + str(len(lines) - 1))
+ elif tokens[0] == "quote":
+ if len(tokens) > 2 or \
+ (len(tokens) == 2 and not tokens[1].isdigit()):
+ notice("SYNTAX: !quote [int]")
+ return
+ if not os.access(quotesfile_name, os.F_OK):
+ notice("NO QUOTES AVAILABLE")
+ return
+ quotesfile = open(quotesfile_name, "r")
+ lines = quotesfile.readlines()
+ quotesfile.close()
+ lines = lines[1:]
+ if len(tokens) == 2:
+ i = int(tokens[1])
+ if i == 0 or i > len(lines):
+ notice("THERE'S NO QUOTE OF THAT INDEX")
+ return
+ i = i - 1
+ else:
+ i = random.randrange(len(lines))
+ notice("QUOTE #" + str(i + 1) + ": " + lines[i])
sender = ""
for rune in tokens[0]:
if receiver != nickname:
target = receiver
msg = str.join(" ", tokens[3:])[1:]
+ command_check(msg)
url_check(msg)
while True: