From: Christian Heller Date: Thu, 4 Feb 2016 02:12:01 +0000 (+0100) Subject: Strip URLs from markov texts. X-Git-Url: https://plomlompom.com/repos/?p=plomlombot-irc.git;a=commitdiff_plain;h=f3d948172f8b7c60c91621cd0adb2f0c9f0e37db Strip URLs from markov texts. --- diff --git a/plomlombot.py b/plomlombot.py index 9197592..c8c11f4 100755 --- a/plomlombot.py +++ b/plomlombot.py @@ -13,6 +13,8 @@ import hashlib import os import plomsearch +URLREGEX = "(https?://[^\s>]+)" + # Defaults, may be overwritten by command line arguments. SERVER = "irc.freenode.net" PORT = 6667 @@ -183,7 +185,10 @@ def handle_command(command, argument, notice, target, session): shuffle(usable_selections) return usable_selections[0][select_length] - def purge_present_users(tokens): + def purge_undesired(tokens): + for token in tokens: + if None != re.match("^" + URLREGEX, token): + del(tokens[tokens.index(token)]) for name in session.uses_in_chan: while True: try: @@ -204,7 +209,7 @@ def handle_command(command, argument, notice, target, session): for line in lines: line = line.replace("\n", "") tokens += line.split() - tokens = purge_present_users(tokens) + tokens = purge_undesired(tokens) if len(tokens) <= select_length: notice("NOT ENOUGH TEXT TO MARKOV.") return @@ -288,7 +293,7 @@ class Session: def notice(msg): self.io.send_line("NOTICE " + target + " :" + msg) - matches = re.findall("(https?://[^\s>]+)", msg) + matches = re.findall(URLREGEX, msg) for i in range(len(matches)): handle_url(matches[i], notice) if "!" == msg[0]: