From: Christian Heller Date: Sat, 23 Jan 2016 21:36:03 +0000 (+0100) Subject: Add quote searching. X-Git-Url: https://plomlompom.com/repos/%7B%7Bdb.prefix%7D%7D/%7B%7Bprefix%7D%7D/%7B%7Byoutube_prefix%7D%7D%7B%7Bvideo_id%7D%7D?a=commitdiff_plain;h=7bbd08d498b289f2f3cad8b85c8582aa6ceffc04;p=plomlombot-irc.git Add quote searching. --- diff --git a/plomlombot.py b/plomlombot.py index e1ec06a..c32b6dd 100755 --- a/plomlombot.py +++ b/plomlombot.py @@ -11,6 +11,7 @@ import bs4 import random import hashlib import os +import plomsearch # Defaults, may be overwritten by command line arguments. SERVER = "irc.freenode.net" @@ -167,9 +168,16 @@ def lineparser_loop(io, nickname): quotesfile.close() notice("ADDED QUOTE #" + str(len(lines) - 1)) elif tokens[0] == "quote": - if len(tokens) > 2 or \ + if (len(tokens) > 2 and tokens[1] != "search") or \ + (len(tokens) < 3 and tokens[1] == "search") or \ (len(tokens) == 2 and not tokens[1].isdigit()): - notice("SYNTAX: !quote [int]") + notice("SYNTAX: !quote [int] OR !quote search QUERY") + notice("QUERY may be a boolean grouping of quoted or "\ + + "unquoted search terms, examples:") + notice("!quote search foo") + notice("!quote search foo AND (bar OR NOT baz)") + notice("!quote search \"foo\\\"bar\" AND "\ + + "('NOT\"' AND \"'foo'\" OR 'bar\\'baz')") return if not os.access(quotesfile_name, os.F_OK): notice("NO QUOTES AVAILABLE") @@ -184,6 +192,20 @@ def lineparser_loop(io, nickname): notice("THERE'S NO QUOTE OF THAT INDEX") return i = i - 1 + elif len(tokens) > 2: + query = str.join(" ", tokens[2:]) + try: + results = plomsearch.search(query, lines) + except plomsearch.LogicParserError as err: + notice("FAILED QUERY PARSING: " + str(err)) + return + if len(results) == 0: + notice("NO QUOTES MATCHING QUERY") + else: + for result in results: + notice("QUOTE #" + str(result[0] + 1) + " : " + + result[1]) + return else: i = random.randrange(len(lines)) notice("QUOTE #" + str(i + 1) + ": " + lines[i]) diff --git a/plomsearch.py b/plomsearch.py new file mode 100644 index 0000000..da11750 --- /dev/null +++ b/plomsearch.py @@ -0,0 +1,239 @@ +class CompoundStatement: + def __init__(self, or_list, negneg=True): + self.or_list = or_list + self.neg = False if negneg else True + #def __repr__(self): + # return "<" + str(not self.neg) + ": OR'd " + str(self.or_list) + ">" + +class LogicParserError(Exception): + pass + +def parseToCompoundStatement(string): + parenthesis_in = "(" + parenthesis_out = ")" + quotes = "'\"" + escape = '\\' + space = " " + meta_marker = "\n" + not_words = ["NOT"] + and_words = ["AND"] + or_words = ["OR"] + + def tokenize(string): + tokens = [] + string = string.replace(meta_marker, "") + quote = "" + token = "" + parentheses = parenthesis_in + parenthesis_out + escaped = False + in_token = False + for char in string: + if in_token and quote == "" and char in quotes + parentheses \ + and not escaped: + in_token = False + tokens += [token] + token = "" + if not in_token: + if char in quotes: + in_token = True + quote = char + token = meta_marker + continue + elif char in parentheses: + tokens += [char] + continue + elif char == space: + continue + else: + in_token = True + if in_token: + if not escaped: + if char == escape: + escaped = True + continue + if char == quote or (quote == "" and char == space): + if char == quote: + quote = "" + in_token = False + tokens += [token] + token = "" + continue + else: + escaped = False + token += char + if quote: + raise LogicParserError("Token not properly closed.") + if in_token: + tokens += [token] + return tokens + + def parenthesize(tokens): + open_parentheses = 0 + compounds = [] + def group_by_parentheses(i): + nonlocal open_parentheses + compound = [] + while i < len(tokens): + if tokens[i] == parenthesis_in: + open_parentheses += 1 + i, token = group_by_parentheses(i + 1) + compound += [token] + elif tokens[i] == parenthesis_out: + open_parentheses -= 1 + if open_parentheses < 0: + raise LogicParserError("Improper parentheses.") + return i + 1, compound + else: + compound += [tokens[i]] + i += 1 + return i, compound + _, compounds = group_by_parentheses(0) + if open_parentheses > 0: + raise LogicParserError("Improper parentheses.") + return compounds + + def group_by_negation(tree): + i = 0 + while i < len(tree): + if type(tree[i]) == str and tree[i] in not_words: + if i > len(tree) - 2: + raise LogicParserError("Improper negation.") + # NOT A = [False, A] + tree[i] = [False, tree[i + 1]] + tree.pop(i + 1) + if type(tree[i][1]) == list: + group_by_negation(tree[i][1]) + elif type(tree[i]) == list: + group_by_negation(tree[i]) + i += 1 + + def group_by_and(tree): + i = 0 + if type(tree[i]) == bool: + i += 1 + if type(tree[i]) == list: + group_by_and(tree[i]) + if tree[i] in or_words + and_words: + raise LogicParserError("Improper AND/OR placement.") + while len(tree[i:]) > 1: + if tree[i + 1] not in or_words + and_words: + raise LogicParserError("Improper token grouping.") + elif len(tree[i:]) < 3 or \ + tree[i + 2] in or_words + and_words: + raise LogicParserError("Improper AND/OR placement.") + if type(tree[i + 2]) == list: + group_by_and(tree[i + 2]) + if tree[i + 1] in and_words: + # A AND B = NOT (NOT A OR NOT B) + tree[i] = [False, [[False, tree[i]], [False, tree[i + 2]]]] + tree.pop(i + 2) + tree.pop(i + 1) + else: + i += 2 + + def group_by_or(tree): + i = 0 + if type(tree[i]) == bool: + i += 1 + if type(tree[i]) == list: + group_by_or(tree[i]) + if tree[i] in or_words: + raise LogicParserError("Improper OR placement.") + while len(tree[i:]) > 1: + if tree[i + 1] in or_words: + if type(tree[i + 2]) == list: + group_by_or(tree[i + 2]) + tree[i + 1] = tree[i + 2] + tree.pop(i + 2) + else: + if type(tree[i + 1]) == list: + group_by_or(tree[i + 1]) + i += 1 + + def flatten(tree): + i = 0 + while i < len(tree): + if type(tree[i]) == list: + tree[i] = flatten(tree[i]) + i += 1 + if len(tree) == 1 and type(tree[0]) == list: + # ( A ) = A + tree = tree[0] + if len(tree) == 2 and tree[0] == False and type(tree[1]) == list \ + and len(tree[1]) == 2 and tree[1][0] == False: + # NOT NOT A = A + tree = tree[1][1] + return tree + + def strip_meta_marker(tree): + i = 0 + while i < len(tree): + if type(tree[i]) == list: + strip_meta_marker(tree[i]) + elif type(tree[i]) == str: + tree[i] = tree[i].replace(meta_marker, "") + i += 1 + + def toCompoundStatement(compounds): + def transform(tree): + negneg = True + i = 0 + or_group = [] + if tree[0] == False: + negneg = False + i = 1 + while i < len(tree): + if type(tree[i]) == list: + or_group += [transform(tree[i])] + else: + or_group += [tree[i]] + i += 1 + return CompoundStatement(or_group, negneg) + return transform(compounds) + + tokens = tokenize(string) + compounds = parenthesize(tokens) + group_by_negation(compounds) + group_by_and(compounds) + group_by_or(compounds) + flatten(compounds) + strip_meta_marker(compounds) + return toCompoundStatement(compounds) + +def search(query, string_list): + + def testStringMatchLogic(statement, compare_value): + if type(statement) == str: + statement_true = statement in compare_value + elif type(statement) == CompoundStatement: + or_list_true = False + if len(statement.or_list) > 1: + for i_statement in statement.or_list: + if testStringMatchLogic(i_statement, compare_value): + or_list_true = True + break + else: + or_list_true = testStringMatchLogic(statement.or_list[0], + compare_value) + if statement.neg: + statement_true = not or_list_true + else: + statement_true = or_list_true + return statement_true + + results = [] + statement = parseToCompoundStatement(query) + for i in range(len(string_list)): + if testStringMatchLogic(statement, string_list[i]): + results += [[i, string_list[i]]] + return results + +#TEST: +#lines = [ +#"Hallo Welt,", +#"wie geht es dir,", +#"ist heut nicht ein schöner Tag?" +#] +#query = "NOT (geht OR 'ö')" +#for line in search(query, lines): +# print(line)