home · contact · privacy
Add quote searching.
authorChristian Heller <c.heller@plomlompom.de>
Sat, 23 Jan 2016 21:36:03 +0000 (22:36 +0100)
committerChristian Heller <c.heller@plomlompom.de>
Sat, 23 Jan 2016 21:36:03 +0000 (22:36 +0100)
plomlombot.py
plomsearch.py [new file with mode: 0644]

index e1ec06a4abf82f51a6a60a3a70c3673fe5bf53c5..c32b6dd3c0c42b79f86bb00c96d6f8b7d9017fd2 100755 (executable)
@@ -11,6 +11,7 @@ import bs4
 import random
 import hashlib
 import os
 import random
 import hashlib
 import os
+import plomsearch
 
 # Defaults, may be overwritten by command line arguments.
 SERVER = "irc.freenode.net"
 
 # Defaults, may be overwritten by command line arguments.
 SERVER = "irc.freenode.net"
@@ -167,9 +168,16 @@ def lineparser_loop(io, nickname):
                 quotesfile.close()
                 notice("ADDED QUOTE #" + str(len(lines) - 1))
             elif tokens[0] == "quote":
                 quotesfile.close()
                 notice("ADDED QUOTE #" + str(len(lines) - 1))
             elif tokens[0] == "quote":
-                if len(tokens) > 2 or \
+                if (len(tokens) > 2 and tokens[1] != "search") or \
+                    (len(tokens) < 3 and tokens[1] == "search") or \
                     (len(tokens) == 2 and not tokens[1].isdigit()):
                     (len(tokens) == 2 and not tokens[1].isdigit()):
-                    notice("SYNTAX: !quote [int]")
+                    notice("SYNTAX: !quote [int] OR !quote search QUERY")
+                    notice("QUERY may be a boolean grouping of quoted or "\
+                        + "unquoted search terms, examples:")
+                    notice("!quote search foo")
+                    notice("!quote search foo AND (bar OR NOT baz)")
+                    notice("!quote search \"foo\\\"bar\" AND "\
+                            + "('NOT\"' AND \"'foo'\" OR 'bar\\'baz')")
                     return
                 if not os.access(quotesfile_name, os.F_OK):
                     notice("NO QUOTES AVAILABLE")
                     return
                 if not os.access(quotesfile_name, os.F_OK):
                     notice("NO QUOTES AVAILABLE")
@@ -184,6 +192,20 @@ def lineparser_loop(io, nickname):
                         notice("THERE'S NO QUOTE OF THAT INDEX")
                         return
                     i = i - 1
                         notice("THERE'S NO QUOTE OF THAT INDEX")
                         return
                     i = i - 1
+                elif len(tokens) > 2:
+                    query = str.join(" ", tokens[2:])
+                    try:
+                        results = plomsearch.search(query, lines)
+                    except plomsearch.LogicParserError as err:
+                        notice("FAILED QUERY PARSING: " + str(err))
+                        return
+                    if len(results) == 0:
+                        notice("NO QUOTES MATCHING QUERY")
+                    else:
+                        for result in results:
+                            notice("QUOTE #" + str(result[0] + 1) + " : "
+                                + result[1])
+                    return
                 else:
                     i = random.randrange(len(lines))
                 notice("QUOTE #" + str(i + 1) + ": " + lines[i])
                 else:
                     i = random.randrange(len(lines))
                 notice("QUOTE #" + str(i + 1) + ": " + lines[i])
diff --git a/plomsearch.py b/plomsearch.py
new file mode 100644 (file)
index 0000000..da11750
--- /dev/null
@@ -0,0 +1,239 @@
+class CompoundStatement:
+    def __init__(self, or_list, negneg=True):
+        self.or_list = or_list
+        self.neg = False if negneg else True
+    #def __repr__(self):
+    #    return "<" + str(not self.neg) + ": OR'd " + str(self.or_list) + ">"
+
+class LogicParserError(Exception):
+    pass
+
+def parseToCompoundStatement(string):
+    parenthesis_in = "("
+    parenthesis_out = ")"
+    quotes = "'\""
+    escape = '\\'
+    space = " "
+    meta_marker = "\n"
+    not_words = ["NOT"]
+    and_words = ["AND"]
+    or_words = ["OR"]
+
+    def tokenize(string):
+        tokens = []
+        string = string.replace(meta_marker, "")
+        quote = ""
+        token = ""
+        parentheses = parenthesis_in + parenthesis_out
+        escaped = False
+        in_token = False
+        for char in string:
+            if in_token and quote == "" and char in quotes + parentheses \
+                    and not escaped:
+                in_token = False
+                tokens += [token]
+                token = ""
+            if not in_token:
+                if char in quotes:
+                    in_token = True
+                    quote = char
+                    token = meta_marker 
+                    continue
+                elif char in parentheses:
+                    tokens += [char]
+                    continue
+                elif char == space:
+                    continue
+                else:
+                    in_token = True
+            if in_token:
+                if not escaped:
+                    if char == escape:
+                        escaped = True
+                        continue
+                    if char == quote or (quote == "" and char == space):
+                        if char == quote:
+                            quote = ""
+                        in_token = False
+                        tokens += [token]
+                        token = ""
+                        continue
+                else:
+                    escaped = False
+                token += char
+        if quote:
+            raise LogicParserError("Token not properly closed.")
+        if in_token:
+            tokens += [token]
+        return tokens
+
+    def parenthesize(tokens):
+        open_parentheses = 0
+        compounds = []
+        def group_by_parentheses(i):
+            nonlocal open_parentheses
+            compound = []
+            while i < len(tokens):
+                if tokens[i] == parenthesis_in:
+                    open_parentheses += 1
+                    i, token = group_by_parentheses(i + 1)
+                    compound += [token]
+                elif tokens[i] == parenthesis_out:
+                    open_parentheses -= 1
+                    if open_parentheses < 0:
+                        raise LogicParserError("Improper parentheses.")
+                    return i + 1, compound
+                else:
+                    compound += [tokens[i]]
+                    i += 1
+            return i, compound
+        _, compounds = group_by_parentheses(0)
+        if open_parentheses > 0:
+            raise LogicParserError("Improper parentheses.")
+        return compounds
+
+    def group_by_negation(tree):
+        i = 0
+        while i < len(tree):
+            if type(tree[i]) == str and tree[i] in not_words:
+                if i > len(tree) - 2:
+                    raise LogicParserError("Improper negation.")
+                # NOT A = [False, A]
+                tree[i] = [False, tree[i + 1]]
+                tree.pop(i + 1)
+                if type(tree[i][1]) == list:
+                    group_by_negation(tree[i][1])
+            elif type(tree[i]) == list:
+                group_by_negation(tree[i])
+            i += 1
+
+    def group_by_and(tree):
+        i = 0
+        if type(tree[i]) == bool:
+            i += 1
+        if type(tree[i]) == list:
+            group_by_and(tree[i])
+        if tree[i] in or_words + and_words:
+            raise LogicParserError("Improper AND/OR placement.")
+        while len(tree[i:]) > 1:
+            if tree[i + 1] not in or_words + and_words:
+                raise LogicParserError("Improper token grouping.")
+            elif len(tree[i:]) < 3 or \
+                    tree[i + 2] in or_words + and_words:
+                raise LogicParserError("Improper AND/OR placement.")
+            if type(tree[i + 2]) == list:
+                group_by_and(tree[i + 2])
+            if tree[i + 1] in and_words:
+                # A AND B = NOT (NOT A OR NOT B)
+                tree[i] = [False, [[False, tree[i]], [False, tree[i + 2]]]]
+                tree.pop(i + 2)
+                tree.pop(i + 1)
+            else:
+                i += 2
+
+    def group_by_or(tree):
+        i = 0
+        if type(tree[i]) == bool:
+            i += 1
+        if type(tree[i]) == list:
+            group_by_or(tree[i])
+        if tree[i] in or_words:
+            raise LogicParserError("Improper OR placement.")
+        while len(tree[i:]) > 1:
+            if tree[i + 1] in or_words:
+                if type(tree[i + 2]) == list:
+                    group_by_or(tree[i + 2])
+                tree[i + 1] = tree[i + 2]
+                tree.pop(i + 2)
+            else:
+                if type(tree[i + 1]) == list:
+                    group_by_or(tree[i + 1])
+                i += 1
+
+    def flatten(tree):
+        i = 0
+        while i < len(tree):
+            if type(tree[i]) == list:
+                tree[i] = flatten(tree[i])
+            i += 1
+        if len(tree) == 1 and type(tree[0]) == list:
+            # ( A ) = A
+            tree = tree[0]
+        if len(tree) == 2 and tree[0] == False and type(tree[1]) == list \
+                and len(tree[1]) == 2 and tree[1][0] == False:
+            # NOT NOT A = A
+            tree = tree[1][1]
+        return tree
+
+    def strip_meta_marker(tree):
+        i = 0
+        while i < len(tree):
+            if type(tree[i]) == list:
+                strip_meta_marker(tree[i])
+            elif type(tree[i]) == str:
+                tree[i] = tree[i].replace(meta_marker, "")
+            i += 1
+
+    def toCompoundStatement(compounds):
+        def transform(tree):
+            negneg = True
+            i = 0
+            or_group = []
+            if tree[0] == False:
+                negneg = False
+                i = 1
+            while i < len(tree):
+                if type(tree[i]) == list:
+                    or_group += [transform(tree[i])]
+                else:
+                    or_group += [tree[i]]
+                i += 1
+            return CompoundStatement(or_group, negneg)
+        return transform(compounds)
+
+    tokens = tokenize(string)
+    compounds = parenthesize(tokens) 
+    group_by_negation(compounds)
+    group_by_and(compounds)
+    group_by_or(compounds)
+    flatten(compounds)
+    strip_meta_marker(compounds)
+    return toCompoundStatement(compounds)
+
+def search(query, string_list):
+
+    def testStringMatchLogic(statement, compare_value):
+        if type(statement) == str:
+            statement_true = statement in compare_value 
+        elif type(statement) == CompoundStatement:
+            or_list_true = False
+            if len(statement.or_list) > 1:
+                for i_statement in statement.or_list:
+                    if testStringMatchLogic(i_statement, compare_value):
+                        or_list_true = True
+                        break
+            else:
+                or_list_true = testStringMatchLogic(statement.or_list[0],
+                        compare_value)
+            if statement.neg:
+                statement_true = not or_list_true
+            else:
+                statement_true = or_list_true
+        return statement_true 
+
+    results = []
+    statement = parseToCompoundStatement(query)
+    for i in range(len(string_list)):
+        if testStringMatchLogic(statement, string_list[i]):
+            results += [[i, string_list[i]]]
+    return results
+
+#TEST:
+#lines = [
+#"Hallo Welt,",
+#"wie geht es dir,",
+#"ist heut nicht ein schöner Tag?"
+#]
+#query = "NOT (geht OR 'ö')"
+#for line in search(query, lines):
+#    print(line)