home · contact · privacy
In Markov text generation, lowercase earlier.
[plomlombot-irc.git] / plomlombot.py
index 8813498594f0a3ca4d453a366eac6ea246154bc7..82f96a21e4c93b7371ef1669f749fd73c96afde0 100755 (executable)
@@ -13,6 +13,8 @@ import hashlib
 import os
 import plomsearch
 
+URLREGEX = "(https?://[^\s>]+)"
+
 # Defaults, may be overwritten by command line arguments.
 SERVER = "irc.freenode.net"
 PORT = 6667
@@ -93,7 +95,7 @@ class IO:
         return line
 
 
-def handle_command(command, argument, notice, target):
+def handle_command(command, argument, notice, target, session):
     hash_string = hashlib.md5(target.encode("utf-8")).hexdigest()
     quotesfile_name = "quotes_" + hash_string
 
@@ -183,6 +185,18 @@ def handle_command(command, argument, notice, target):
             shuffle(usable_selections)
             return usable_selections[0][select_length]
 
+        def purge_undesired(tokens):
+            for token in tokens:
+                if None != re.match("^" + URLREGEX, token):
+                    del(tokens[tokens.index(token)])
+            for name in session.uses_in_chan:
+                while True:
+                    try:
+                        del(tokens[tokens.index(name.lower())])
+                    except ValueError:
+                        break
+            return tokens
+
         hash_string = hashlib.md5(target.encode("utf-8")).hexdigest()
         markovfeed_name = "markovfeed_" + hash_string
         if not os.access(markovfeed_name, os.F_OK):
@@ -193,8 +207,9 @@ def handle_command(command, argument, notice, target):
         file.close()
         tokens = []
         for line in lines:
-            line = line.replace("\n", "")
+            line = line.replace("\n", "").lower()
             tokens += line.split()
+        tokens = purge_undesired(tokens)
         if len(tokens) <= select_length:
             notice("NOT ENOUGH TEXT TO MARKOV.")
             return
@@ -209,13 +224,13 @@ def handle_command(command, argument, notice, target):
         msg = ""
         while 1:
             new_end = markov(snippet)
-            if len(msg) + len(new_end) > 400:
+            if len(msg) + len(new_end) > 200:
                 break
             msg += new_end + " "
             for i in range(select_length - 1):
                 snippet[i] = snippet[i + 1]
             snippet[select_length - 1] = new_end
-        notice(msg.lower() + "malkovich.")
+        notice(msg + "malkovich.")
 
     if "addquote" == command:
         addquote()
@@ -248,7 +263,7 @@ def handle_url(url, notice, show_url=False):
         return
     if mobile_twitter_hack(url):
         return
-    title = bs4.BeautifulSoup(r.text).title
+    title = bs4.BeautifulSoup(r.text, "html.parser").title
     if title:
         prefix = "PAGE TITLE: "
         if show_url:
@@ -263,9 +278,11 @@ class Session:
     def __init__(self, io, username, nickname, channel):
         self.io = io
         self.nickname = nickname
+        self.channel = channel
+        self.uses_in_chan = []
         self.io.send_line("NICK " + self.nickname)
         self.io.send_line("USER " + username + " 0 * : ")
-        self.io.send_line("JOIN " + channel)
+        self.io.send_line("JOIN " + self.channel)
 
     def loop(self):
 
@@ -276,13 +293,13 @@ class Session:
                 def notice(msg):
                     self.io.send_line("NOTICE " + target + " :" + msg)
 
-                matches = re.findall("(https?://[^\s>]+)", msg)
+                matches = re.findall(URLREGEX, msg)
                 for i in range(len(matches)):
                     handle_url(matches[i], notice)
                 if "!" == msg[0]:
                     tokens = msg[1:].split()
                     argument = str.join(" ", tokens[1:])
-                    handle_command(tokens[0], argument, notice, target)
+                    handle_command(tokens[0], argument, notice, target, self)
                     return
                 hash_string = hashlib.md5(target.encode("utf-8")).hexdigest()
                 markovfeed_name = "markovfeed_" + hash_string
@@ -308,6 +325,14 @@ class Session:
             msg = str.join(" ", tokens[3:])[1:]
             handle_input(msg, target)
 
+        def name_from_join_or_part(tokens):
+            token = tokens[0][1:]
+            index_cut = token.find("@")
+            index_ex = token.find("!")
+            if index_ex > 0 and index_ex < index_cut:
+                index_cut = index_ex
+            return token[:index_cut]
+
         while True:
             line = self.io.recv_line()
             if not line:
@@ -318,7 +343,17 @@ class Session:
                     self.io.send_line("PONG " + tokens[1])
                 elif tokens[1] == "PRIVMSG":
                     handle_privmsg(tokens)
-
+                elif tokens[1] == "353":
+                    names = tokens[5:]
+                    names[0] = names[0][1:]
+                    self.uses_in_chan += names
+                elif tokens[1] == "JOIN":
+                    name = name_from_join_or_part(tokens)
+                    if name != self.nickname:
+                        self.uses_in_chan += [name]
+                elif tokens[1] == "PART":
+                    name = name_from_join_or_part(tokens)
+                    del(self.uses_in_chan[self.uses_in_chan.index(name)])
 
 def parse_command_line_arguments():
     parser = argparse.ArgumentParser()