"""
Common Utilities for parsing and testing
"""
__author__ = "Alex Kislev, Emma Li, Peter Makarov"
__version__ = "1.0"
__date__ = "Tue, 24 Aug 2010"
import re
from nltk import load_parser
from .temporaldrt import DrtVariableExpression, unique_variable, NewInfoDRS
from .presuppdrt import ResolutionException, DrtParser as PresuppDrtParser
from .presuppdrt import DrtConcatenation, DrtExpression, DRS
from types import LambdaType
from nltk.sem.logic import AndExpression, LogicalExpressionException, LogicParser
from nltk.sem import drt
from .inference import inference_check, get_bk, AdmissibilityError, ConsistencyError, InformativityError
[docs]class UngrammaticalException(Exception):
pass
[docs]class FailedReading(Exception):
pass
[docs]class ComparisonFailed(Exception):
pass
[docs]class NoReadingProduced(Exception):
pass
class Tester(object):
INFERROR = {
3 : AdmissibilityError,
2 : InformativityError,
1 : ConsistencyError
}
WORD_SPLIT = re.compile(" |, |,")
EXCLUDED_NEXT = re.compile("^ha[sd]|is|was|not|will$")
EXCLUDED = re.compile("^does|h?is|red|[a-z]+ness$")
SUBSTITUTIONS = [
(re.compile("^died$"), ("did", "die")),
(re.compile("^([A-Z][a-z]+)'s?$"), lambda m: (m.group(1), "s")),
(re.compile("^(?P<stem>[a-z]+)s$"), lambda m: ("does", m.group("stem"))),
(re.compile("^([a-z]+(?:[^cvklt]|lk|nt))ed|([a-z]+[cvlkt]e)d$"), lambda m: ("did", m.group(1) if m.group(1) else m.group(2))),
(re.compile("^([A-Z]?[a-z]+)one$"), lambda m: (m.group(1), "one")),
(re.compile("^([A-Z]?[a-z]+)thing$"), lambda m: (m.group(1), "thing")),
(re.compile("^bit$"), ("did", "bite")),
(re.compile("^bought$"), ("did", "buy")),
(re.compile("^wrote$"), ("did", "write")),
]
def __init__(self, grammar, drt_parser, subtests=None):
assert isinstance(grammar, str) and grammar.endswith('.fcfg'), \
"%s is not a grammar name" % grammar
self.drt_parser = drt_parser()
self.presupp_parser = PresuppDrtParser()
self.logic_parser = LogicParser()
self.parser = load_parser(grammar, logic_parser=self.drt_parser)
self.subtests = subtests
def _split(self, sentence):
words = []
exlude_next = False
for word in Tester.WORD_SPLIT.split(sentence):
match = None
if Tester.EXCLUDED_NEXT.match(word):
exlude_next = True
words.append(word)
continue
if exlude_next or Tester.EXCLUDED.match(word):
exlude_next = False
words.append(word)
continue
for pattern, replacement in Tester.SUBSTITUTIONS:
match = pattern.match(word)
if match:
if isinstance(replacement, LambdaType):
words.extend(replacement(match))
else:
words.extend(replacement)
break
if not match:
words.append(word)
return words
def parse(self, text, **args) -> DrtExpression:
sentences = text.split('.')
utter = args.get("utter", True)
verbose = args.get("verbose", False)
drs = (utter and self.drt_parser.parse('DRS([n],[])')) or []
for sentence in sentences:
sentence = sentence.lstrip()
if sentence:
words = self._split(sentence)
if verbose:
print(words)
trees = [tree for tree in self.parser.parse(words)]
try:
new_drs = trees[0].label()["SEM"].simplify()
except IndexError:
raise UngrammaticalException()
if verbose:
print(new_drs)
if drs:
drs = (drs + new_drs).simplify()
else:
drs = new_drs
if verbose:
print(drs)
return drs
def test(self, cases, **args):
verbose = args.get("verbose", False)
i = 0
for number, sentence, expected in cases:
expected_drs = []
if expected:
for item in expected if isinstance(expected, list) else [expected]:
expected_drs.append(self.presupp_parser.parse(item, verbose))
expression = self.parse(sentence, **args)
readings = []
errors = []
try:
readings, errors = expression.resolve(lambda x: (True, None), verbose)
except ResolutionException as e:
pass
'''except Exception as e:
with self.subtests.test(msg="seed", i=i):
i += 1
raise e'''
#result = expression.resolve_anaphora()
#readings = [result] # TODO
#errors = [] # TODO (??)
if not readings and expected:
with self.subtests.test(msg="seed", i=i):
i += 1
raise NoReadingProduced(f"{number}. No reading produced, but expected in test!")
elif not readings and not expected:
pass
elif len(expected_drs) == len(readings):
for index, pair in enumerate(zip(expected_drs, readings)):
with self.subtests.test(msg="seed", i=i):
i += 1
if pair[0] == pair[1]:
print(("%s. %s -- Reading (%s): %s\n" % (number, sentence, index + 1, pair[1])))
else:
raise FailedReading(("%s. !!!failed reading (%s)!!!\n\n%s\n\nExpected:\t%s\n\nReturns:\t%s\n" %
(number, index + 1, sentence, pair[0], pair[1])))
else:
with self.subtests.test(msg="seed", i=i):
i += 1
msg = f"{number}. {sentence} \n"
msg += "!!! comparison failed !!! \n\n Expected: \n " + '\n'.join(str(x) for x in expected_drs) + "\n\n"
msg += "Got:\n" + '\n'.join(str(x) for x in readings)
raise ComparisonFailed(msg)
#raise ComparisonFailed(("%s. !!!comparison failed!!!\n\n%s\n" % (number, sentence)))
def interpret(self, expr_1, expr_2, background=None, verbose=True, test=False):
"""Interprets a new expression with respect to some previous discourse
and background knowledge. The function first generates relevant background
knowledge and then performs inference check on readings generated by
the resolve() method. It returns a list of admissible interpretations in
the form of DRSs."""
assert(not expr_1 or isinstance(expr_1, str)), "Expression %s is not a string" % expr_1
assert(isinstance(expr_2, str)), "Expression %s is not a string" % expr_2
assert(not background or isinstance(background, dict)), "Background knowledge is not in dictionary format"
try:
if expr_1:
discourse = self.parse(expr_1, utter=True)
expression = self.parse_new(discourse, expr_2)
else:
discourse = None
expression = self.parse(expr_2, utter=True)
#interpretations, errors = self.interpret_new(discourse, expression, background=background, verbose=verbose)
result = self.interpret_new(discourse, expression, background=background, verbose=verbose)
interpretations = [result]
errors = [] # TODO
if test:
return interpretations, errors
else:
return interpretations
except IndexError:
print("Input sentences only!")
except ValueError as e:
print("Error:", e)
def collect_background(self, discourse, background, verbose=True):
background_knowledge = None
for formula in get_bk(discourse, background):
try:
parsed_formula = self.presupp_parser.parse(formula).fol()
except LogicalExpressionException:
try:
parsed_formula = self.logic_parser.parse(formula)
except Exception as e:
print("Error: %s" % e)
if background_knowledge:
background_knowledge = AndExpression(background_knowledge, parsed_formula)
else:
background_knowledge = parsed_formula
if verbose:
print("Generated background knowledge:\n%s" % background_knowledge)
return background_knowledge
def parse_new(self, discourse, expression_str):
"""parse the new expression and make sure that it has unique variables"""
expression = self.parse(expression_str, utter=False)
for ref in set(expression.get_refs(True)) & set(discourse.get_refs(True)):
newref = DrtVariableExpression(unique_variable(ref))
expression = expression.replace(ref, newref, True)
return expression
def interpret_new(self, discourse, expression, background=None, verbose=True):
"""Interprets a new expression with respect to some previous discourse
and background knowledge. The function first generates relevant background
knowledge and then performs inference check on readings generated by
the resolve() method. It returns a list of admissible interpretations in
the form of DRSs."""
try:
if discourse:
new_discourse = (NewInfoDRS([], [expression]) + discourse).simplify()
else:
new_discourse = expression.simplify()
if background:
background_knowledge = self.collect_background(new_discourse, background, verbose)
else:
background_knowledge = None
#
return new_discourse.resolve(lambda x: inference_check(x, background_knowledge, verbose), verbose)
#return new_discourse.resolve_anaphora()
except IndexError:
print("Input sentences only!")
except ValueError as e:
print("Error: %s" % e)
def inference_test(self, cases, bk, verbose=True):
for number, discourse, expression, judgement in cases:
print("\n%s. %s %s" % (number, discourse, expression))
interpretations, errors = self.interpret(discourse, expression, bk, verbose=True, test=True)
for interpretation in interpretations:
print("\nAdmissible interpretation: ", interpretation)
if judgement:
if not isinstance(judgement, list):
judgement = [judgement]
if len(judgement) == len(errors):
for index, error in enumerate(errors):
error_message = Tester.INFERROR.get(judgement[index], False)
if verbose:
print("\nexpected error:%s" % error_message)
print("\nreturned error:%s" % error[1])
if type(error[1]) is error_message:
print("\nInadmissible reading %s returns as expected:\n\t%s" % (error[0], error_message.__name__))
else:
print("\n#!!!#: Inadmissible reading %s returned with unexpected error: %s" % (error[0], error[1]))
else:
print("\n#!!!#: !Unexpected error! #!!!#")
else:
print("\nNo inadmissible readings")