"""Knowledge in learning, Chapter 19""" from random import shuffle from utils import powerset from collections import defaultdict from itertools import combinations # ______________________________________________________________________________ def current_best_learning(examples, h, examples_so_far=[]): """ [Figure 19.2] The hypothesis is a list of dictionaries, with each dictionary representing a disjunction.""" if not examples: return h e = examples[0] if is_consistent(e, h): return current_best_learning(examples[1:], h, examples_so_far + [e]) elif false_positive(e, h): for h2 in specializations(examples_so_far + [e], h): h3 = current_best_learning(examples[1:], h2, examples_so_far + [e]) if h3 != 'FAIL': return h3 elif false_negative(e, h): for h2 in generalizations(examples_so_far + [e], h): h3 = current_best_learning(examples[1:], h2, examples_so_far + [e]) if h3 != 'FAIL': return h3 return 'FAIL' def specializations(examples_so_far, h): """Specialize the hypothesis by adding AND operations to the disjunctions""" hypotheses = [] for i, disj in enumerate(h): for e in examples_so_far: for k, v in e.items(): if k in disj or k == 'GOAL': continue h2 = h[i].copy() h2[k] = '!' + v h3 = h.copy() h3[i] = h2 if check_all_consistency(examples_so_far, h3): hypotheses.append(h3) shuffle(hypotheses) return hypotheses def generalizations(examples_so_far, h): """Generalize the hypothesis. First delete operations (including disjunctions) from the hypothesis. Then, add OR operations.""" hypotheses = [] # Delete disjunctions disj_powerset = powerset(range(len(h))) for disjs in disj_powerset: h2 = h.copy() for d in reversed(list(disjs)): del h2[d] if check_all_consistency(examples_so_far, h2): hypotheses += h2 # Delete AND operations in disjunctions for i, disj in enumerate(h): a_powerset = powerset(disj.keys()) for attrs in a_powerset: h2 = h[i].copy() for a in attrs: del h2[a] if check_all_consistency(examples_so_far, [h2]): h3 = h.copy() h3[i] = h2.copy() hypotheses += h3 # Add OR operations if hypotheses == [] or hypotheses == [{}]: hypotheses = add_or(examples_so_far, h) else: hypotheses.extend(add_or(examples_so_far, h)) shuffle(hypotheses) return hypotheses def add_or(examples_so_far, h): """Adds an OR operation to the hypothesis. The AND operations in the disjunction are generated by the last example (which is the problematic one).""" ors = [] e = examples_so_far[-1] attrs = {k: v for k, v in e.items() if k != 'GOAL'} a_powerset = powerset(attrs.keys()) for c in a_powerset: h2 = {} for k in c: h2[k] = attrs[k] if check_negative_consistency(examples_so_far, h2): h3 = h.copy() h3.append(h2) ors.append(h3) return ors # ______________________________________________________________________________ def version_space_learning(examples): """ [Figure 19.3] The version space is a list of hypotheses, which in turn are a list of dictionaries/disjunctions.""" V = all_hypotheses(examples) for e in examples: if V: V = version_space_update(V, e) return V def version_space_update(V, e): return [h for h in V if is_consistent(e, h)] def all_hypotheses(examples): """Builds a list of all the possible hypotheses""" values = values_table(examples) h_powerset = powerset(values.keys()) hypotheses = [] for s in h_powerset: hypotheses.extend(build_attr_combinations(s, values)) hypotheses.extend(build_h_combinations(hypotheses)) return hypotheses def values_table(examples): """Builds a table with all the possible values for each attribute. Returns a dictionary with keys the attribute names and values a list with the possible values for the corresponding attribute.""" values = defaultdict(lambda: []) for e in examples: for k, v in e.items(): if k == 'GOAL': continue mod = '!' if e['GOAL']: mod = '' if mod + v not in values[k]: values[k].append(mod + v) values = dict(values) return values def build_attr_combinations(s, values): """Given a set of attributes, builds all the combinations of values. If the set holds more than one attribute, recursively builds the combinations.""" if len(s) == 1: # s holds just one attribute, return its list of values k = values[s[0]] h = [[{s[0]: v}] for v in values[s[0]]] return h h = [] for i, a in enumerate(s): rest = build_attr_combinations(s[i+1:], values) for v in values[a]: o = {a: v} for r in rest: t = o.copy() for d in r: t.update(d) h.append([t]) return h def build_h_combinations(hypotheses): """Given a set of hypotheses, builds and returns all the combinations of the hypotheses.""" h = [] h_powerset = powerset(range(len(hypotheses))) for s in h_powerset: t = [] for i in s: t.extend(hypotheses[i]) h.append(t) return h # ______________________________________________________________________________ def minimal_consistent_det(E, A): n = len(A) for i in range(n + 1): for A_i in combinations(A, i): if consistent_det(A_i, E): return set(A_i) def consistent_det(A, E): H = {} for e in E: attr_values = tuple(e[attr] for attr in A) if attr_values in H and H[attr_values] != e['GOAL']: return False H[attr_values] = e['GOAL'] return True # ______________________________________________________________________________ def check_all_consistency(examples, h): """Check for the consistency of all examples under h""" for e in examples: if not is_consistent(e, h): return False return True def check_negative_consistency(examples, h): """Check if the negative examples are consistent under h""" for e in examples: if e['GOAL']: continue if not is_consistent(e, [h]): return False return True def disjunction_value(e, d): """The value of example e under disjunction d""" for k, v in d.items(): if v[0] == '!': # v is a NOT expression # e[k], thus, should not be equal to v if e[k] == v[1:]: return False elif e[k] != v: return False return True def guess_value(e, h): """Guess value of example e under hypothesis h""" for d in h: if disjunction_value(e, d): return True return False def is_consistent(e, h): return e["GOAL"] == guess_value(e, h) def false_positive(e, h): if e["GOAL"] == False: if guess_value(e, h): return True return False def false_negative(e, h): if e["GOAL"] == True: if not guess_value(e, h): return True return False