knowledge.py 6,95 ko
Newer Older
"""Knowledge in learning, Chapter 19"""

from random import shuffle
from utils import powerset
from collections import defaultdict

# ______________________________________________________________________________


def current_best_learning(examples, h, examples_so_far=[]):
    """ [Figure 19.2]
    The hypothesis is a list of dictionaries, with each dictionary representing
    a disjunction."""
    if not examples:
        return h

    e = examples[0]
    if is_consistent(e, h):
        return current_best_learning(examples[1:], h, examples_so_far + [e])
    elif false_positive(e, h):
        for h2 in specializations(examples_so_far + [e], h):
            h3 = current_best_learning(examples[1:], h2, examples_so_far + [e])
            if h3 != 'FAIL':
                return h3
    elif false_negative(e, h):
        for h2 in generalizations(examples_so_far + [e], h):
            h3 = current_best_learning(examples[1:], h2, examples_so_far + [e])
            if h3 != 'FAIL':
                return h3

    return 'FAIL'


def specializations(examples_so_far, h):
    """Specialize the hypothesis by adding AND operations to the disjunctions"""
    hypotheses = []

    for i, disj in enumerate(h):
        for e in examples_so_far:
            for k, v in e.items():
                if k in disj or k == 'GOAL':
                    continue

                h2 = h[i].copy()
                h2[k] = '!' + v
                h3 = h.copy()
                h3[i] = h2
                if check_all_consistency(examples_so_far, h3):
                    hypotheses.append(h3)

    shuffle(hypotheses)
    return hypotheses


def generalizations(examples_so_far, h):
    """Generalize the hypothesis. First delete operations
    (including disjunctions) from the hypothesis. Then, add OR operations."""
    hypotheses = []

    # Delete disjunctions
    disj_powerset = powerset(range(len(h)))
    for disjs in disj_powerset:
        h2 = h.copy()
        for d in reversed(list(disjs)):
            del h2[d]

        if check_all_consistency(examples_so_far, h2):
            hypotheses += h2

    # Delete AND operations in disjunctions
    for i, disj in enumerate(h):
        a_powerset = powerset(disj.keys())
        for attrs in a_powerset:
            h2 = h[i].copy()
            for a in attrs:
                del h2[a]

            if check_all_consistency(examples_so_far, [h2]):
                h3 = h.copy()
                h3[i] = h2.copy()
                hypotheses += h3

    # Add OR operations
    if hypotheses == [] or hypotheses == [{}]:
        hypotheses = add_or(examples_so_far, h)
    else:
        hypotheses.extend(add_or(examples_so_far, h))

    shuffle(hypotheses)
    return hypotheses


def add_or(examples_so_far, h):
    """Adds an OR operation to the hypothesis. The AND operations in the disjunction
    are generated by the last example (which is the problematic one)."""
    ors = []
    e = examples_so_far[-1]

    attrs = {k: v for k, v in e.items() if k != 'GOAL'}
    a_powerset = powerset(attrs.keys())

    for c in a_powerset:
        h2 = {}
        for k in c:
            h2[k] = attrs[k]

        if check_negative_consistency(examples_so_far, h2):
            h3 = h.copy()
            h3.append(h2)
            ors.append(h3)

    return ors

# ______________________________________________________________________________


def version_space_learning(examples):
    """ [Figure 19.3]
    The version space is a list of hypotheses, which in turn are a list
    of dictionaries/disjunctions."""
    V = all_hypotheses(examples)
    for e in examples:
        if V:
            V = version_space_update(V, e)

    return V


def version_space_update(V, e):
    return [h for h in V if is_consistent(e, h)]


def all_hypotheses(examples):
    """Builds a list of all the possible hypotheses"""
    values = values_table(examples)
    h_powerset = powerset(values.keys())
    hypotheses = []
    for s in h_powerset:
        hypotheses.extend(build_attr_combinations(s, values))

    hypotheses.extend(build_h_combinations(hypotheses))

    return hypotheses


def values_table(examples):
    """Builds a table with all the possible values for each attribute.
    Returns a dictionary with keys the attribute names and values a list
    with the possible values for the corresponding attribute."""
    values = defaultdict(lambda: [])
    for e in examples:
        for k, v in e.items():
            if k == 'GOAL':
                continue

            mod = '!'
            if e['GOAL']:
                mod = ''

            if mod + v not in values[k]:
                values[k].append(mod + v)

    values = dict(values)
    return values


def build_attr_combinations(s, values):
    """Given a set of attributes, builds all the combinations of values.
    If the set holds more than one attribute, recursively builds the
    combinations."""
    if len(s) == 1:
        # s holds just one attribute, return its list of values
        k = values[s[0]]
        h = [[{s[0]: v}] for v in values[s[0]]]
        return h

    h = []
    for i, a in enumerate(s):
        rest = build_attr_combinations(s[i+1:], values)
        for v in values[a]:
            o = {a: v}
            for r in rest:
                t = o.copy()
                for d in r:
                    t.update(d)
                h.append([t])

    return h


def build_h_combinations(hypotheses):
    """Given a set of hypotheses, builds and returns all the combinations of the
    hypotheses."""
    h = []
    h_powerset = powerset(range(len(hypotheses)))

    for s in h_powerset:
        t = []
        for i in s:
            t.extend(hypotheses[i])
        h.append(t)

    return h

# ______________________________________________________________________________


def check_all_consistency(examples, h):
    """Check for the consistency of all examples under h"""
    for e in examples:
        if not is_consistent(e, h):
            return False

    return True


def check_negative_consistency(examples, h):
    """Check if the negative examples are consistent under h"""
    for e in examples:
        if e['GOAL']:
            continue

        if not is_consistent(e, [h]):
            return False

    return True


def disjunction_value(e, d):
    """The value of example e under disjunction d"""
    for k, v in d.items():
        if v[0] == '!':
            # v is a NOT expression
            # e[k], thus, should not be equal to v
            if e[k] == v[1:]:
                return False
        elif e[k] != v:
            return False

    return True


def guess_value(e, h):
    """Guess value of example e under hypothesis h"""
    for d in h:
        if disjunction_value(e, d):
            return True

    return False


def is_consistent(e, h):
    return e["GOAL"] == guess_value(e, h)


def false_positive(e, h):
    if e["GOAL"] == False:
        if guess_value(e, h):
            return True

    return False


def false_negative(e, h):
    if e["GOAL"] == True:
        if not guess_value(e, h):
            return True

    return False