home
·
contact
·
privacy
projects
/
plomlombot-irc.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Be verbose on page title retrieval failure.
[plomlombot-irc.git]
/
plomlombot.py
diff --git
a/plomlombot.py
b/plomlombot.py
index a47158c302a3341cbe3e3777410b6e073c0a485d..81be31fdb5f438349469e7d63afac9b4b20b1a7e 100755
(executable)
--- a/
plomlombot.py
+++ b/
plomlombot.py
@@
-7,6
+7,7
@@
import select
import time
import re
import urllib.request
import time
import re
import urllib.request
+import http.client
import html
# Defaults, may be overwritten by command line arguments.
import html
# Defaults, may be overwritten by command line arguments.
@@
-99,14
+100,21
@@
def lineparser_loop(io, nickname):
def act_on_privmsg(tokens):
def url_check(msg):
def act_on_privmsg(tokens):
def url_check(msg):
- matches = re.findall("(https?://[^\s]+)", msg)
+
+ def notice(msg):
+ io.send_line("NOTICE " + target + " :" + msg)
+
+ matches = re.findall("(https?://[^\s>]+)", msg)
for i in range(len(matches)):
url = matches[i]
for i in range(len(matches)):
url = matches[i]
+ request = urllib.request.Request(url, headers={
+ "User-Agent": "plomlombot"
+ })
try:
try:
- webpage = urllib.request.urlopen(
url
, timeout=15)
+ webpage = urllib.request.urlopen(
request
, timeout=15)
except (urllib.error.HTTPError, urllib.error.URLError,
except (urllib.error.HTTPError, urllib.error.URLError,
- UnicodeError) as error:
-
print
("TROUBLE FOLLOWING URL: " + str(error))
+ UnicodeError
, http.client.BadStatusLine
) as error:
+
notice
("TROUBLE FOLLOWING URL: " + str(error))
continue
charset = webpage.info().get_content_charset()
if not charset:
continue
charset = webpage.info().get_content_charset()
if not charset:
@@
-114,14
+122,13
@@
def lineparser_loop(io, nickname):
content_type = webpage.info().get_content_type()
if content_type not in ('text/html', 'text/xml',
'application/xhtml+xml'):
content_type = webpage.info().get_content_type()
if content_type not in ('text/html', 'text/xml',
'application/xhtml+xml'):
-
print("TROUBLE INTERPRETING URL
: bad content type "
- + content_type)
+
notice("TROUBLE READING PAGE TITLE
: bad content type "
+
+ content_type)
continue
content = webpage.read().decode(charset)
title = str(content).split('<title>')[1].split('</title>')[0]
title = html.unescape(title)
continue
content = webpage.read().decode(charset)
title = str(content).split('<title>')[1].split('</title>')[0]
title = html.unescape(title)
- io.send_line("PRIVMSG " + target + " :page title for url: "
- + title)
+ notice("PAGE TITLE FOR URL: " + title)
sender = ""
for rune in tokens[0]:
sender = ""
for rune in tokens[0]:
@@
-141,7
+148,7
@@
def lineparser_loop(io, nickname):
msg = str.join(" ", tokens[3:])[1:]
url_check(msg)
msg = str.join(" ", tokens[3:])[1:]
url_check(msg)
- while
1
:
+ while
True
:
line = io.recv_line()
if not line:
continue
line = io.recv_line()
if not line:
continue
@@
-177,7
+184,7
@@
def parse_command_line_arguments():
return opts
opts = parse_command_line_arguments()
return opts
opts = parse_command_line_arguments()
-while
1
:
+while
True
:
try:
io = init_session(opts.server, opts.port, opts.timeout, opts.nickname,
opts.username, opts.CHANNEL)
try:
io = init_session(opts.server, opts.port, opts.timeout, opts.nickname,
opts.username, opts.CHANNEL)