home · contact · privacy
Cancel page title reading on bad charset.
[plomlombot-irc.git] / plomlombot.py
index 81be31fdb5f438349469e7d63afac9b4b20b1a7e..8ac8f01c00cf0cc4e00ad6e4a664a743926ac2f6 100755 (executable)
@@ -67,7 +67,11 @@ class IO:
                 self._pingtest(send_ping)
                 return None
             self.last_pong = time.time()
                 self._pingtest(send_ping)
                 return None
             self.last_pong = time.time()
-            received_runes = self.socket.recv(1024).decode("UTF-8")
+            received_bytes = self.socket.recv(1024)
+            try:
+                received_runes = received_bytes.decode("UTF-8")
+            except UnicodeDecodeError:
+                received_runes = received_bytes.decode("latin1")
             if len(received_runes) == 0:
                 print("SOCKET CONNECTION BROKEN")
                 raise ExceptionForRestart
             if len(received_runes) == 0:
                 print("SOCKET CONNECTION BROKEN")
                 raise ExceptionForRestart
@@ -118,7 +122,8 @@ def lineparser_loop(io, nickname):
                     continue
                 charset = webpage.info().get_content_charset()
                 if not charset:
                     continue
                 charset = webpage.info().get_content_charset()
                 if not charset:
-                    charset = "utf-8"
+                    notice("TROUBLE READING PAGE TITLE: no charset in header")
+                    continue
                 content_type = webpage.info().get_content_type()
                 if content_type not in ('text/html', 'text/xml',
                                         'application/xhtml+xml'):
                 content_type = webpage.info().get_content_type()
                 if content_type not in ('text/html', 'text/xml',
                                         'application/xhtml+xml'):