home
·
contact
·
privacy
projects
/
plomlombot-irc.git
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (from parent 1:
9a7e292
)
Assume default charset utf-8 when no charset found in URL interpreter.
author
Christian Heller
<c.heller@plomlompom.de>
Sun, 17 Jan 2016 20:21:17 +0000
(21:21 +0100)
committer
Christian Heller
<c.heller@plomlompom.de>
Sun, 17 Jan 2016 20:21:17 +0000
(21:21 +0100)
plomlombot.py
patch
|
blob
|
history
diff --git
a/plomlombot.py
b/plomlombot.py
index 36228f31b2ccafe949d381958169077a9e6bf77a..e4a5be35aabd091204209eff379a542f206c7e4e 100644
(file)
--- a/
plomlombot.py
+++ b/
plomlombot.py
@@
-78,8
+78,11
@@
def url_check(msg):
webpage = urllib.request.urlopen(url, timeout=15)
content_type = webpage.info().get_content_type()
charset = webpage.info().get_content_charset()
webpage = urllib.request.urlopen(url, timeout=15)
content_type = webpage.info().get_content_type()
charset = webpage.info().get_content_charset()
- if not charset or not content_type in ('text/html', 'text/xml',
+ if not charset:
+ charset="utf-8"
+ if not content_type in ('text/html', 'text/xml',
'application/xhtml+xml'):
'application/xhtml+xml'):
+ print("TROUBLE INTERPRETING URL: bad content_type " + content_type)
continue
content = webpage.read().decode(charset)
title = str(content).split('<title>')[1].split('</title>')[0]
continue
content = webpage.read().decode(charset)
title = str(content).split('<title>')[1].split('</title>')[0]