search.ipynb 662 ko
Newer Older
       "body .sb { color: #BA2121 } /* Literal.String.Backtick */\n",
       "body .sc { color: #BA2121 } /* Literal.String.Char */\n",
       "body .dl { color: #BA2121 } /* Literal.String.Delimiter */\n",
       "body .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */\n",
       "body .s2 { color: #BA2121 } /* Literal.String.Double */\n",
       "body .se { color: #BB6622; font-weight: bold } /* Literal.String.Escape */\n",
       "body .sh { color: #BA2121 } /* Literal.String.Heredoc */\n",
       "body .si { color: #BB6688; font-weight: bold } /* Literal.String.Interpol */\n",
       "body .sx { color: #008000 } /* Literal.String.Other */\n",
       "body .sr { color: #BB6688 } /* Literal.String.Regex */\n",
       "body .s1 { color: #BA2121 } /* Literal.String.Single */\n",
       "body .ss { color: #19177C } /* Literal.String.Symbol */\n",
       "body .bp { color: #008000 } /* Name.Builtin.Pseudo */\n",
       "body .fm { color: #0000FF } /* Name.Function.Magic */\n",
       "body .vc { color: #19177C } /* Name.Variable.Class */\n",
       "body .vg { color: #19177C } /* Name.Variable.Global */\n",
       "body .vi { color: #19177C } /* Name.Variable.Instance */\n",
       "body .vm { color: #19177C } /* Name.Variable.Magic */\n",
       "body .il { color: #666666 } /* Literal.Number.Integer.Long */\n",
       "\n",
       "  </style>\n",
       "</head>\n",
       "<body>\n",
       "<h2></h2>\n",
       "\n",
       "<div class=\"highlight\"><pre><span></span><span class=\"k\">class</span> <span class=\"nc\">OnlineDFSAgent</span><span class=\"p\">:</span>\n",
       "\n",
       "    <span class=\"sd\">&quot;&quot;&quot;[Figure 4.21] The abstract class for an OnlineDFSAgent. Override</span>\n",
       "<span class=\"sd\">    update_state method to convert percept to state. While initializing</span>\n",
       "<span class=\"sd\">    the subclass a problem needs to be provided which is an instance of</span>\n",
       "<span class=\"sd\">    a subclass of the Problem class.&quot;&quot;&quot;</span>\n",
       "\n",
       "    <span class=\"k\">def</span> <span class=\"fm\">__init__</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"p\">,</span> <span class=\"n\">problem</span><span class=\"p\">):</span>\n",
       "        <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">problem</span> <span class=\"o\">=</span> <span class=\"n\">problem</span>\n",
       "        <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">s</span> <span class=\"o\">=</span> <span class=\"bp\">None</span>\n",
       "        <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">a</span> <span class=\"o\">=</span> <span class=\"bp\">None</span>\n",
       "        <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">untried</span> <span class=\"o\">=</span> <span class=\"nb\">dict</span><span class=\"p\">()</span>\n",
       "        <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">unbacktracked</span> <span class=\"o\">=</span> <span class=\"nb\">dict</span><span class=\"p\">()</span>\n",
       "        <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">result</span> <span class=\"o\">=</span> <span class=\"p\">{}</span>\n",
       "\n",
       "    <span class=\"k\">def</span> <span class=\"fm\">__call__</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"p\">,</span> <span class=\"n\">percept</span><span class=\"p\">):</span>\n",
       "        <span class=\"n\">s1</span> <span class=\"o\">=</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">update_state</span><span class=\"p\">(</span><span class=\"n\">percept</span><span class=\"p\">)</span>\n",
       "        <span class=\"k\">if</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">problem</span><span class=\"o\">.</span><span class=\"n\">goal_test</span><span class=\"p\">(</span><span class=\"n\">s1</span><span class=\"p\">):</span>\n",
       "            <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">a</span> <span class=\"o\">=</span> <span class=\"bp\">None</span>\n",
       "        <span class=\"k\">else</span><span class=\"p\">:</span>\n",
       "            <span class=\"k\">if</span> <span class=\"n\">s1</span> <span class=\"ow\">not</span> <span class=\"ow\">in</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">untried</span><span class=\"o\">.</span><span class=\"n\">keys</span><span class=\"p\">():</span>\n",
       "                <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">untried</span><span class=\"p\">[</span><span class=\"n\">s1</span><span class=\"p\">]</span> <span class=\"o\">=</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">problem</span><span class=\"o\">.</span><span class=\"n\">actions</span><span class=\"p\">(</span><span class=\"n\">s1</span><span class=\"p\">)</span>\n",
       "            <span class=\"k\">if</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">s</span> <span class=\"ow\">is</span> <span class=\"ow\">not</span> <span class=\"bp\">None</span><span class=\"p\">:</span>\n",
       "                <span class=\"k\">if</span> <span class=\"n\">s1</span> <span class=\"o\">!=</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">result</span><span class=\"p\">[(</span><span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">s</span><span class=\"p\">,</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">a</span><span class=\"p\">)]:</span>\n",
       "                    <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">result</span><span class=\"p\">[(</span><span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">s</span><span class=\"p\">,</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">a</span><span class=\"p\">)]</span> <span class=\"o\">=</span> <span class=\"n\">s1</span>\n",
       "                    <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">unbacktracked</span><span class=\"p\">[</span><span class=\"n\">s1</span><span class=\"p\">]</span><span class=\"o\">.</span><span class=\"n\">insert</span><span class=\"p\">(</span><span class=\"mi\">0</span><span class=\"p\">,</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">s</span><span class=\"p\">)</span>\n",
       "            <span class=\"k\">if</span> <span class=\"nb\">len</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">untried</span><span class=\"p\">[</span><span class=\"n\">s1</span><span class=\"p\">])</span> <span class=\"o\">==</span> <span class=\"mi\">0</span><span class=\"p\">:</span>\n",
       "                <span class=\"k\">if</span> <span class=\"nb\">len</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">unbacktracked</span><span class=\"p\">[</span><span class=\"n\">s1</span><span class=\"p\">])</span> <span class=\"o\">==</span> <span class=\"mi\">0</span><span class=\"p\">:</span>\n",
       "                    <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">a</span> <span class=\"o\">=</span> <span class=\"bp\">None</span>\n",
       "                <span class=\"k\">else</span><span class=\"p\">:</span>\n",
       "                    <span class=\"c1\"># else a &lt;- an action b such that result[s&#39;, b] = POP(unbacktracked[s&#39;])</span>\n",
       "                    <span class=\"n\">unbacktracked_pop</span> <span class=\"o\">=</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">unbacktracked</span><span class=\"o\">.</span><span class=\"n\">pop</span><span class=\"p\">(</span><span class=\"n\">s1</span><span class=\"p\">)</span>\n",
       "                    <span class=\"k\">for</span> <span class=\"p\">(</span><span class=\"n\">s</span><span class=\"p\">,</span> <span class=\"n\">b</span><span class=\"p\">)</span> <span class=\"ow\">in</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">result</span><span class=\"o\">.</span><span class=\"n\">keys</span><span class=\"p\">():</span>\n",
       "                        <span class=\"k\">if</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">result</span><span class=\"p\">[(</span><span class=\"n\">s</span><span class=\"p\">,</span> <span class=\"n\">b</span><span class=\"p\">)]</span> <span class=\"o\">==</span> <span class=\"n\">unbacktracked_pop</span><span class=\"p\">:</span>\n",
       "                            <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">a</span> <span class=\"o\">=</span> <span class=\"n\">b</span>\n",
       "                            <span class=\"k\">break</span>\n",
       "            <span class=\"k\">else</span><span class=\"p\">:</span>\n",
       "                <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">a</span> <span class=\"o\">=</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">untried</span><span class=\"o\">.</span><span class=\"n\">pop</span><span class=\"p\">(</span><span class=\"n\">s1</span><span class=\"p\">)</span>\n",
       "        <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">s</span> <span class=\"o\">=</span> <span class=\"n\">s1</span>\n",
       "        <span class=\"k\">return</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">a</span>\n",
       "\n",
       "    <span class=\"k\">def</span> <span class=\"nf\">update_state</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"p\">,</span> <span class=\"n\">percept</span><span class=\"p\">):</span>\n",
       "        <span class=\"sd\">&quot;&quot;&quot;To be overridden in most cases. The default case</span>\n",
       "<span class=\"sd\">        assumes the percept to be of type state.&quot;&quot;&quot;</span>\n",
       "        <span class=\"k\">return</span> <span class=\"n\">percept</span>\n",
       "</pre></div>\n",
       "</body>\n",
       "</html>\n"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "psource(OnlineDFSAgent)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "It maintains two dictionaries `untried` and `unbacktracked`.\n",
    "`untried` contains nodes that have not been visited yet.\n",
    "`unbacktracked` contains the sequence of nodes that the agent has visited so it can backtrack to it later, if required.\n",
    "`s` and `a` store the state and the action respectively and `result` stores the final path or solution of the problem.\n",
    "<br>\n",
    "Let's look at another online search algorithm."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## LRTA* AGENT\n",
    "We can infer now that hill-climbing is an online search algorithm, but it is not very useful natively because for complicated search spaces, it might converge to the local minima and indefinitely stay there.\n",
    "In such a case, we can choose to randomly restart it a few times with different starting conditions and return the result with the lowest total cost.\n",
    "Sometimes, it is better to use random walks instead of random restarts depending on the problem, but progress can still be very slow.\n",
    "A better improvement would be to give hill-climbing a memory element.\n",
    "We store the current best heuristic estimate and it is updated as the agent gains experience in the state space.\n",
    "The estimated optimal cost is made more and more accurate as time passes and each time the the local minima is \"flattened out\" until we escape it.\n",
    "<br>\n",
    "This learning scheme is a simple improvement upon traditional hill-climbing and is called _learning real-time A*_ or __LRTA*__.\n",
    "Similar to _Online DFS-Agent_, it builds a map of the environment and chooses the best possible move according to its current heuristic estimates.\n",
    "<br>\n",
    "Actions that haven't been tried yet are assumed to lead immediately to the goal with the least possible cost.\n",
    "This is called __optimism under uncertainty__ and encourages the agent to explore new promising paths.\n",
    "This algorithm might not terminate if the state space is infinite, unlike A* search.\n",
    "<br>\n",
    "Let's have a look at the `LRTAStarAgent` class."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01//EN\"\n",
       "   \"http://www.w3.org/TR/html4/strict.dtd\">\n",
       "\n",
       "<html>\n",
       "<head>\n",
       "  <title></title>\n",
       "  <meta http-equiv=\"content-type\" content=\"text/html; charset=None\">\n",
       "  <style type=\"text/css\">\n",
       "td.linenos { background-color: #f0f0f0; padding-right: 10px; }\n",
       "span.lineno { background-color: #f0f0f0; padding: 0 5px 0 5px; }\n",
       "pre { line-height: 125%; }\n",
       "body .hll { background-color: #ffffcc }\n",
       "body  { background: #f8f8f8; }\n",
       "body .c { color: #408080; font-style: italic } /* Comment */\n",
       "body .err { border: 1px solid #FF0000 } /* Error */\n",
       "body .k { color: #008000; font-weight: bold } /* Keyword */\n",
       "body .o { color: #666666 } /* Operator */\n",
       "body .ch { color: #408080; font-style: italic } /* Comment.Hashbang */\n",
       "body .cm { color: #408080; font-style: italic } /* Comment.Multiline */\n",
       "body .cp { color: #BC7A00 } /* Comment.Preproc */\n",
       "body .cpf { color: #408080; font-style: italic } /* Comment.PreprocFile */\n",
       "body .c1 { color: #408080; font-style: italic } /* Comment.Single */\n",
       "body .cs { color: #408080; font-style: italic } /* Comment.Special */\n",
       "body .gd { color: #A00000 } /* Generic.Deleted */\n",
       "body .ge { font-style: italic } /* Generic.Emph */\n",
       "body .gr { color: #FF0000 } /* Generic.Error */\n",
       "body .gh { color: #000080; font-weight: bold } /* Generic.Heading */\n",
       "body .gi { color: #00A000 } /* Generic.Inserted */\n",
       "body .go { color: #888888 } /* Generic.Output */\n",
       "body .gp { color: #000080; font-weight: bold } /* Generic.Prompt */\n",
       "body .gs { font-weight: bold } /* Generic.Strong */\n",
       "body .gu { color: #800080; font-weight: bold } /* Generic.Subheading */\n",
       "body .gt { color: #0044DD } /* Generic.Traceback */\n",
       "body .kc { color: #008000; font-weight: bold } /* Keyword.Constant */\n",
       "body .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */\n",
       "body .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */\n",
       "body .kp { color: #008000 } /* Keyword.Pseudo */\n",
       "body .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */\n",
       "body .kt { color: #B00040 } /* Keyword.Type */\n",
       "body .m { color: #666666 } /* Literal.Number */\n",
       "body .s { color: #BA2121 } /* Literal.String */\n",
       "body .na { color: #7D9029 } /* Name.Attribute */\n",
       "body .nb { color: #008000 } /* Name.Builtin */\n",
       "body .nc { color: #0000FF; font-weight: bold } /* Name.Class */\n",
       "body .no { color: #880000 } /* Name.Constant */\n",
       "body .nd { color: #AA22FF } /* Name.Decorator */\n",
       "body .ni { color: #999999; font-weight: bold } /* Name.Entity */\n",
       "body .ne { color: #D2413A; font-weight: bold } /* Name.Exception */\n",
       "body .nf { color: #0000FF } /* Name.Function */\n",
       "body .nl { color: #A0A000 } /* Name.Label */\n",
       "body .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */\n",
       "body .nt { color: #008000; font-weight: bold } /* Name.Tag */\n",
       "body .nv { color: #19177C } /* Name.Variable */\n",
       "body .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */\n",
       "body .w { color: #bbbbbb } /* Text.Whitespace */\n",
       "body .mb { color: #666666 } /* Literal.Number.Bin */\n",
       "body .mf { color: #666666 } /* Literal.Number.Float */\n",
       "body .mh { color: #666666 } /* Literal.Number.Hex */\n",
       "body .mi { color: #666666 } /* Literal.Number.Integer */\n",
       "body .mo { color: #666666 } /* Literal.Number.Oct */\n",
       "body .sa { color: #BA2121 } /* Literal.String.Affix */\n",
       "body .sb { color: #BA2121 } /* Literal.String.Backtick */\n",
       "body .sc { color: #BA2121 } /* Literal.String.Char */\n",
       "body .dl { color: #BA2121 } /* Literal.String.Delimiter */\n",
       "body .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */\n",
       "body .s2 { color: #BA2121 } /* Literal.String.Double */\n",
       "body .se { color: #BB6622; font-weight: bold } /* Literal.String.Escape */\n",
       "body .sh { color: #BA2121 } /* Literal.String.Heredoc */\n",
       "body .si { color: #BB6688; font-weight: bold } /* Literal.String.Interpol */\n",
       "body .sx { color: #008000 } /* Literal.String.Other */\n",
       "body .sr { color: #BB6688 } /* Literal.String.Regex */\n",
       "body .s1 { color: #BA2121 } /* Literal.String.Single */\n",
       "body .ss { color: #19177C } /* Literal.String.Symbol */\n",
       "body .bp { color: #008000 } /* Name.Builtin.Pseudo */\n",
       "body .fm { color: #0000FF } /* Name.Function.Magic */\n",
       "body .vc { color: #19177C } /* Name.Variable.Class */\n",
       "body .vg { color: #19177C } /* Name.Variable.Global */\n",
       "body .vi { color: #19177C } /* Name.Variable.Instance */\n",
       "body .vm { color: #19177C } /* Name.Variable.Magic */\n",
       "body .il { color: #666666 } /* Literal.Number.Integer.Long */\n",
       "\n",
       "  </style>\n",
       "</head>\n",
       "<body>\n",
       "<h2></h2>\n",
       "\n",
       "<div class=\"highlight\"><pre><span></span><span class=\"k\">class</span> <span class=\"nc\">LRTAStarAgent</span><span class=\"p\">:</span>\n",
       "    <span class=\"sd\">&quot;&quot;&quot; [Figure 4.24]</span>\n",
       "<span class=\"sd\">    Abstract class for LRTA*-Agent. A problem needs to be</span>\n",
       "<span class=\"sd\">    provided which is an instance of a subclass of Problem Class.</span>\n",
       "\n",
       "<span class=\"sd\">    Takes a OnlineSearchProblem [Figure 4.23] as a problem.</span>\n",
       "<span class=\"sd\">    &quot;&quot;&quot;</span>\n",
       "\n",
       "    <span class=\"k\">def</span> <span class=\"fm\">__init__</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"p\">,</span> <span class=\"n\">problem</span><span class=\"p\">):</span>\n",
       "        <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">problem</span> <span class=\"o\">=</span> <span class=\"n\">problem</span>\n",
       "        <span class=\"c1\"># self.result = {}      # no need as we are using problem.result</span>\n",
       "        <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">H</span> <span class=\"o\">=</span> <span class=\"p\">{}</span>\n",
       "        <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">s</span> <span class=\"o\">=</span> <span class=\"bp\">None</span>\n",
       "        <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">a</span> <span class=\"o\">=</span> <span class=\"bp\">None</span>\n",
       "\n",
       "    <span class=\"k\">def</span> <span class=\"fm\">__call__</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"p\">,</span> <span class=\"n\">s1</span><span class=\"p\">):</span>     <span class=\"c1\"># as of now s1 is a state rather than a percept</span>\n",
       "        <span class=\"k\">if</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">problem</span><span class=\"o\">.</span><span class=\"n\">goal_test</span><span class=\"p\">(</span><span class=\"n\">s1</span><span class=\"p\">):</span>\n",
       "            <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">a</span> <span class=\"o\">=</span> <span class=\"bp\">None</span>\n",
       "            <span class=\"k\">return</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">a</span>\n",
       "        <span class=\"k\">else</span><span class=\"p\">:</span>\n",
       "            <span class=\"k\">if</span> <span class=\"n\">s1</span> <span class=\"ow\">not</span> <span class=\"ow\">in</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">H</span><span class=\"p\">:</span>\n",
       "                <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">H</span><span class=\"p\">[</span><span class=\"n\">s1</span><span class=\"p\">]</span> <span class=\"o\">=</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">problem</span><span class=\"o\">.</span><span class=\"n\">h</span><span class=\"p\">(</span><span class=\"n\">s1</span><span class=\"p\">)</span>\n",
       "            <span class=\"k\">if</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">s</span> <span class=\"ow\">is</span> <span class=\"ow\">not</span> <span class=\"bp\">None</span><span class=\"p\">:</span>\n",
       "                <span class=\"c1\"># self.result[(self.s, self.a)] = s1    # no need as we are using problem.output</span>\n",
       "\n",
       "                <span class=\"c1\"># minimum cost for action b in problem.actions(s)</span>\n",
       "                <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">H</span><span class=\"p\">[</span><span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">s</span><span class=\"p\">]</span> <span class=\"o\">=</span> <span class=\"nb\">min</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">LRTA_cost</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">s</span><span class=\"p\">,</span> <span class=\"n\">b</span><span class=\"p\">,</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">problem</span><span class=\"o\">.</span><span class=\"n\">output</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">s</span><span class=\"p\">,</span> <span class=\"n\">b</span><span class=\"p\">),</span>\n",
       "                                     <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">H</span><span class=\"p\">)</span> <span class=\"k\">for</span> <span class=\"n\">b</span> <span class=\"ow\">in</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">problem</span><span class=\"o\">.</span><span class=\"n\">actions</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">s</span><span class=\"p\">))</span>\n",
       "\n",
       "            <span class=\"c1\"># an action b in problem.actions(s1) that minimizes costs</span>\n",
       "            <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">a</span> <span class=\"o\">=</span> <span class=\"n\">argmin</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">problem</span><span class=\"o\">.</span><span class=\"n\">actions</span><span class=\"p\">(</span><span class=\"n\">s1</span><span class=\"p\">),</span>\n",
       "                            <span class=\"n\">key</span><span class=\"o\">=</span><span class=\"k\">lambda</span> <span class=\"n\">b</span><span class=\"p\">:</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">LRTA_cost</span><span class=\"p\">(</span><span class=\"n\">s1</span><span class=\"p\">,</span> <span class=\"n\">b</span><span class=\"p\">,</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">problem</span><span class=\"o\">.</span><span class=\"n\">output</span><span class=\"p\">(</span><span class=\"n\">s1</span><span class=\"p\">,</span> <span class=\"n\">b</span><span class=\"p\">),</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">H</span><span class=\"p\">))</span>\n",
       "\n",
       "            <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">s</span> <span class=\"o\">=</span> <span class=\"n\">s1</span>\n",
       "            <span class=\"k\">return</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">a</span>\n",
       "\n",
       "    <span class=\"k\">def</span> <span class=\"nf\">LRTA_cost</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"p\">,</span> <span class=\"n\">s</span><span class=\"p\">,</span> <span class=\"n\">a</span><span class=\"p\">,</span> <span class=\"n\">s1</span><span class=\"p\">,</span> <span class=\"n\">H</span><span class=\"p\">):</span>\n",
       "        <span class=\"sd\">&quot;&quot;&quot;Returns cost to move from state &#39;s&#39; to state &#39;s1&#39; plus</span>\n",
       "<span class=\"sd\">        estimated cost to get to goal from s1.&quot;&quot;&quot;</span>\n",
       "        <span class=\"k\">print</span><span class=\"p\">(</span><span class=\"n\">s</span><span class=\"p\">,</span> <span class=\"n\">a</span><span class=\"p\">,</span> <span class=\"n\">s1</span><span class=\"p\">)</span>\n",
       "        <span class=\"k\">if</span> <span class=\"n\">s1</span> <span class=\"ow\">is</span> <span class=\"bp\">None</span><span class=\"p\">:</span>\n",
       "            <span class=\"k\">return</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">problem</span><span class=\"o\">.</span><span class=\"n\">h</span><span class=\"p\">(</span><span class=\"n\">s</span><span class=\"p\">)</span>\n",
       "        <span class=\"k\">else</span><span class=\"p\">:</span>\n",
       "            <span class=\"c1\"># sometimes we need to get H[s1] which we haven&#39;t yet added to H</span>\n",
       "            <span class=\"c1\"># to replace this try, except: we can initialize H with values from problem.h</span>\n",
       "            <span class=\"k\">try</span><span class=\"p\">:</span>\n",
       "                <span class=\"k\">return</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">problem</span><span class=\"o\">.</span><span class=\"n\">c</span><span class=\"p\">(</span><span class=\"n\">s</span><span class=\"p\">,</span> <span class=\"n\">a</span><span class=\"p\">,</span> <span class=\"n\">s1</span><span class=\"p\">)</span> <span class=\"o\">+</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">H</span><span class=\"p\">[</span><span class=\"n\">s1</span><span class=\"p\">]</span>\n",
       "            <span class=\"k\">except</span><span class=\"p\">:</span>\n",
       "                <span class=\"k\">return</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">problem</span><span class=\"o\">.</span><span class=\"n\">c</span><span class=\"p\">(</span><span class=\"n\">s</span><span class=\"p\">,</span> <span class=\"n\">a</span><span class=\"p\">,</span> <span class=\"n\">s1</span><span class=\"p\">)</span> <span class=\"o\">+</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">problem</span><span class=\"o\">.</span><span class=\"n\">h</span><span class=\"p\">(</span><span class=\"n\">s1</span><span class=\"p\">)</span>\n",
       "</pre></div>\n",
       "</body>\n",
       "</html>\n"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "psource(LRTAStarAgent)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "`H` stores the heuristic cost of the paths the agent may travel to.\n",
    "<br>\n",
    "`s` and `a` store the state and the action respectively.\n",
    "<br>\n",
    "`problem` stores the problem definition and the current map of the environment is stored in `problem.result`.\n",
    "<br>\n",
    "The `LRTA_cost` method computes the cost of a new path given the current state `s`, the action `a`, the next state `s1` and the estimated cost to get from `s` to `s1` is extracted from `H`."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's use `LRTAStarAgent` to solve a simple problem.\n",
    "We'll define a new `LRTA_problem` instance based on our `one_dim_state_space`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<search.Graph at 0x1eddaa35ef0>"
      ]
     },
     "execution_count": 83,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "one_dim_state_space"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's define an instance of `OnlineSearchProblem`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {},
   "outputs": [],
   "source": [
    "LRTA_problem = OnlineSearchProblem('State_3', 'State_5', one_dim_state_space)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now we initialize a `LRTAStarAgent` object for the problem we just defined."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {},
   "outputs": [],
   "source": [
    "lrta_agent = LRTAStarAgent(LRTA_problem)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We'll pass the percepts `[State_3, State_4, State_3, State_4, State_5]` one-by-one to our agent to see what action it comes up with at each timestep."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "State_3 Right State_4\n",
      "State_3 Left State_2\n"
    },
    {
     "data": {
      "text/plain": [
       "'Right'"
      ]
     },
     "execution_count": 86,
     "metadata": {},
     "output_type": "execute_result"
    "lrta_agent('State_3')"
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "State_3 Right State_4\n",
      "State_3 Left State_2\n",
      "State_4 Right State_5\n",
      "State_4 Left State_3\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "'Left'"
      ]
     },
     "execution_count": 87,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lrta_agent('State_4')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "State_4 Right State_5\n",
      "State_4 Left State_3\n",
      "State_3 Right State_4\n",
      "State_3 Left State_2\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "'Right'"
      ]
     },
     "execution_count": 88,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lrta_agent('State_3')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "State_3 Right State_4\n",
      "State_3 Left State_2\n",
      "State_4 Right State_5\n",
      "State_4 Left State_3\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "'Right'"
     "execution_count": 89,
     "metadata": {},
     "output_type": "execute_result"
    "lrta_agent('State_4')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "If you manually try to see what the optimal action should be at each step, the outputs of the `lrta_agent` will start to make sense if it doesn't already."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "metadata": {},
   "outputs": [],
   "source": [
    "lrta_agent('State_5')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "There is no possible action for this state."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br>\n",
    "This concludes the notebook.\n",
    "Hope you learned something new!"
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  },
  "widgets": {
   "state": {
    "1516e2501ddd4a2e8e3250bffc0164db": {
     "views": [
      {
       "cell_index": 59
      }
     ]
    },
    "17be64c89a9a4a43b3272cb018df0970": {
     "views": [
      {
       "cell_index": 59
      }
     ]
    },
    "ac05040009a340b0af81b0ee69161fbc": {
     "views": [
      {
       "cell_index": 59
      }
     ]
    },
    "d9735ffe77c24f13ae4ad3620ce84334": {
     "views": [
      {
       "cell_index": 59
      }
     ]
    }
   },
   "version": "1.2.0"
 "nbformat_minor": 1