home
·
contact
·
privacy
projects
/
plomlombot-irc.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Cancel page title reading on bad charset.
[plomlombot-irc.git]
/
plomlombot.py
diff --git
a/plomlombot.py
b/plomlombot.py
index 5fcd517a95f563cfba7a00146d1e6359780a0472..8ac8f01c00cf0cc4e00ad6e4a664a743926ac2f6 100755
(executable)
--- a/
plomlombot.py
+++ b/
plomlombot.py
@@
-7,6
+7,7
@@
import select
import time
import re
import urllib.request
import time
import re
import urllib.request
+import http.client
import html
# Defaults, may be overwritten by command line arguments.
import html
# Defaults, may be overwritten by command line arguments.
@@
-66,7
+67,11
@@
class IO:
self._pingtest(send_ping)
return None
self.last_pong = time.time()
self._pingtest(send_ping)
return None
self.last_pong = time.time()
- received_runes = self.socket.recv(1024).decode("UTF-8")
+ received_bytes = self.socket.recv(1024)
+ try:
+ received_runes = received_bytes.decode("UTF-8")
+ except UnicodeDecodeError:
+ received_runes = received_bytes.decode("latin1")
if len(received_runes) == 0:
print("SOCKET CONNECTION BROKEN")
raise ExceptionForRestart
if len(received_runes) == 0:
print("SOCKET CONNECTION BROKEN")
raise ExceptionForRestart
@@
-99,29
+104,36
@@
def lineparser_loop(io, nickname):
def act_on_privmsg(tokens):
def url_check(msg):
def act_on_privmsg(tokens):
def url_check(msg):
- matches = re.findall("(https?://[^\s]+)", msg)
+
+ def notice(msg):
+ io.send_line("NOTICE " + target + " :" + msg)
+
+ matches = re.findall("(https?://[^\s>]+)", msg)
for i in range(len(matches)):
url = matches[i]
for i in range(len(matches)):
url = matches[i]
+ request = urllib.request.Request(url, headers={
+ "User-Agent": "plomlombot"
+ })
try:
try:
- webpage = urllib.request.urlopen(
url
, timeout=15)
+ webpage = urllib.request.urlopen(
request
, timeout=15)
except (urllib.error.HTTPError, urllib.error.URLError,
except (urllib.error.HTTPError, urllib.error.URLError,
- UnicodeError) as error:
-
print
("TROUBLE FOLLOWING URL: " + str(error))
+ UnicodeError
, http.client.BadStatusLine
) as error:
+
notice
("TROUBLE FOLLOWING URL: " + str(error))
continue
charset = webpage.info().get_content_charset()
if not charset:
continue
charset = webpage.info().get_content_charset()
if not charset:
- charset = "utf-8"
+ notice("TROUBLE READING PAGE TITLE: no charset in header")
+ continue
content_type = webpage.info().get_content_type()
if content_type not in ('text/html', 'text/xml',
'application/xhtml+xml'):
content_type = webpage.info().get_content_type()
if content_type not in ('text/html', 'text/xml',
'application/xhtml+xml'):
-
print("TROUBLE INTERPRETING URL
: bad content type "
- + content_type)
+
notice("TROUBLE READING PAGE TITLE
: bad content type "
+
+ content_type)
continue
content = webpage.read().decode(charset)
title = str(content).split('<title>')[1].split('</title>')[0]
title = html.unescape(title)
continue
content = webpage.read().decode(charset)
title = str(content).split('<title>')[1].split('</title>')[0]
title = html.unescape(title)
- io.send_line("PRIVMSG " + target + " :page title for url: "
- + title)
+ notice("PAGE TITLE FOR URL: " + title)
sender = ""
for rune in tokens[0]:
sender = ""
for rune in tokens[0]: