Newer
Older
6001
6002
6003
6004
6005
6006
6007
6008
6009
6010
6011
6012
6013
6014
6015
6016
6017
6018
6019
6020
6021
6022
6023
6024
6025
6026
6027
6028
6029
6030
6031
6032
6033
6034
6035
6036
6037
6038
6039
6040
6041
6042
6043
6044
6045
6046
6047
6048
6049
6050
6051
6052
6053
6054
6055
6056
6057
6058
6059
6060
6061
6062
6063
6064
6065
6066
6067
6068
6069
6070
6071
6072
6073
6074
"body .sb { color: #BA2121 } /* Literal.String.Backtick */\n",
"body .sc { color: #BA2121 } /* Literal.String.Char */\n",
"body .dl { color: #BA2121 } /* Literal.String.Delimiter */\n",
"body .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */\n",
"body .s2 { color: #BA2121 } /* Literal.String.Double */\n",
"body .se { color: #BB6622; font-weight: bold } /* Literal.String.Escape */\n",
"body .sh { color: #BA2121 } /* Literal.String.Heredoc */\n",
"body .si { color: #BB6688; font-weight: bold } /* Literal.String.Interpol */\n",
"body .sx { color: #008000 } /* Literal.String.Other */\n",
"body .sr { color: #BB6688 } /* Literal.String.Regex */\n",
"body .s1 { color: #BA2121 } /* Literal.String.Single */\n",
"body .ss { color: #19177C } /* Literal.String.Symbol */\n",
"body .bp { color: #008000 } /* Name.Builtin.Pseudo */\n",
"body .fm { color: #0000FF } /* Name.Function.Magic */\n",
"body .vc { color: #19177C } /* Name.Variable.Class */\n",
"body .vg { color: #19177C } /* Name.Variable.Global */\n",
"body .vi { color: #19177C } /* Name.Variable.Instance */\n",
"body .vm { color: #19177C } /* Name.Variable.Magic */\n",
"body .il { color: #666666 } /* Literal.Number.Integer.Long */\n",
"\n",
" </style>\n",
"</head>\n",
"<body>\n",
"<h2></h2>\n",
"\n",
"<div class=\"highlight\"><pre><span></span><span class=\"k\">class</span> <span class=\"nc\">OnlineDFSAgent</span><span class=\"p\">:</span>\n",
"\n",
" <span class=\"sd\">"""[Figure 4.21] The abstract class for an OnlineDFSAgent. Override</span>\n",
"<span class=\"sd\"> update_state method to convert percept to state. While initializing</span>\n",
"<span class=\"sd\"> the subclass a problem needs to be provided which is an instance of</span>\n",
"<span class=\"sd\"> a subclass of the Problem class."""</span>\n",
"\n",
" <span class=\"k\">def</span> <span class=\"fm\">__init__</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"p\">,</span> <span class=\"n\">problem</span><span class=\"p\">):</span>\n",
" <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">problem</span> <span class=\"o\">=</span> <span class=\"n\">problem</span>\n",
" <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">s</span> <span class=\"o\">=</span> <span class=\"bp\">None</span>\n",
" <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">a</span> <span class=\"o\">=</span> <span class=\"bp\">None</span>\n",
" <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">untried</span> <span class=\"o\">=</span> <span class=\"nb\">dict</span><span class=\"p\">()</span>\n",
" <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">unbacktracked</span> <span class=\"o\">=</span> <span class=\"nb\">dict</span><span class=\"p\">()</span>\n",
" <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">result</span> <span class=\"o\">=</span> <span class=\"p\">{}</span>\n",
"\n",
" <span class=\"k\">def</span> <span class=\"fm\">__call__</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"p\">,</span> <span class=\"n\">percept</span><span class=\"p\">):</span>\n",
" <span class=\"n\">s1</span> <span class=\"o\">=</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">update_state</span><span class=\"p\">(</span><span class=\"n\">percept</span><span class=\"p\">)</span>\n",
" <span class=\"k\">if</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">problem</span><span class=\"o\">.</span><span class=\"n\">goal_test</span><span class=\"p\">(</span><span class=\"n\">s1</span><span class=\"p\">):</span>\n",
" <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">a</span> <span class=\"o\">=</span> <span class=\"bp\">None</span>\n",
" <span class=\"k\">else</span><span class=\"p\">:</span>\n",
" <span class=\"k\">if</span> <span class=\"n\">s1</span> <span class=\"ow\">not</span> <span class=\"ow\">in</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">untried</span><span class=\"o\">.</span><span class=\"n\">keys</span><span class=\"p\">():</span>\n",
" <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">untried</span><span class=\"p\">[</span><span class=\"n\">s1</span><span class=\"p\">]</span> <span class=\"o\">=</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">problem</span><span class=\"o\">.</span><span class=\"n\">actions</span><span class=\"p\">(</span><span class=\"n\">s1</span><span class=\"p\">)</span>\n",
" <span class=\"k\">if</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">s</span> <span class=\"ow\">is</span> <span class=\"ow\">not</span> <span class=\"bp\">None</span><span class=\"p\">:</span>\n",
" <span class=\"k\">if</span> <span class=\"n\">s1</span> <span class=\"o\">!=</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">result</span><span class=\"p\">[(</span><span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">s</span><span class=\"p\">,</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">a</span><span class=\"p\">)]:</span>\n",
" <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">result</span><span class=\"p\">[(</span><span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">s</span><span class=\"p\">,</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">a</span><span class=\"p\">)]</span> <span class=\"o\">=</span> <span class=\"n\">s1</span>\n",
" <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">unbacktracked</span><span class=\"p\">[</span><span class=\"n\">s1</span><span class=\"p\">]</span><span class=\"o\">.</span><span class=\"n\">insert</span><span class=\"p\">(</span><span class=\"mi\">0</span><span class=\"p\">,</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">s</span><span class=\"p\">)</span>\n",
" <span class=\"k\">if</span> <span class=\"nb\">len</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">untried</span><span class=\"p\">[</span><span class=\"n\">s1</span><span class=\"p\">])</span> <span class=\"o\">==</span> <span class=\"mi\">0</span><span class=\"p\">:</span>\n",
" <span class=\"k\">if</span> <span class=\"nb\">len</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">unbacktracked</span><span class=\"p\">[</span><span class=\"n\">s1</span><span class=\"p\">])</span> <span class=\"o\">==</span> <span class=\"mi\">0</span><span class=\"p\">:</span>\n",
" <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">a</span> <span class=\"o\">=</span> <span class=\"bp\">None</span>\n",
" <span class=\"k\">else</span><span class=\"p\">:</span>\n",
" <span class=\"c1\"># else a <- an action b such that result[s', b] = POP(unbacktracked[s'])</span>\n",
" <span class=\"n\">unbacktracked_pop</span> <span class=\"o\">=</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">unbacktracked</span><span class=\"o\">.</span><span class=\"n\">pop</span><span class=\"p\">(</span><span class=\"n\">s1</span><span class=\"p\">)</span>\n",
" <span class=\"k\">for</span> <span class=\"p\">(</span><span class=\"n\">s</span><span class=\"p\">,</span> <span class=\"n\">b</span><span class=\"p\">)</span> <span class=\"ow\">in</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">result</span><span class=\"o\">.</span><span class=\"n\">keys</span><span class=\"p\">():</span>\n",
" <span class=\"k\">if</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">result</span><span class=\"p\">[(</span><span class=\"n\">s</span><span class=\"p\">,</span> <span class=\"n\">b</span><span class=\"p\">)]</span> <span class=\"o\">==</span> <span class=\"n\">unbacktracked_pop</span><span class=\"p\">:</span>\n",
" <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">a</span> <span class=\"o\">=</span> <span class=\"n\">b</span>\n",
" <span class=\"k\">break</span>\n",
" <span class=\"k\">else</span><span class=\"p\">:</span>\n",
" <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">a</span> <span class=\"o\">=</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">untried</span><span class=\"o\">.</span><span class=\"n\">pop</span><span class=\"p\">(</span><span class=\"n\">s1</span><span class=\"p\">)</span>\n",
" <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">s</span> <span class=\"o\">=</span> <span class=\"n\">s1</span>\n",
" <span class=\"k\">return</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">a</span>\n",
"\n",
" <span class=\"k\">def</span> <span class=\"nf\">update_state</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"p\">,</span> <span class=\"n\">percept</span><span class=\"p\">):</span>\n",
" <span class=\"sd\">"""To be overridden in most cases. The default case</span>\n",
"<span class=\"sd\"> assumes the percept to be of type state."""</span>\n",
" <span class=\"k\">return</span> <span class=\"n\">percept</span>\n",
"</pre></div>\n",
"</body>\n",
"</html>\n"
],
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"It maintains two dictionaries `untried` and `unbacktracked`.\n",
"`untried` contains nodes that have not been visited yet.\n",
"`unbacktracked` contains the sequence of nodes that the agent has visited so it can backtrack to it later, if required.\n",
"`s` and `a` store the state and the action respectively and `result` stores the final path or solution of the problem.\n",
"<br>\n",
"Let's look at another online search algorithm."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## LRTA* AGENT\n",
"We can infer now that hill-climbing is an online search algorithm, but it is not very useful natively because for complicated search spaces, it might converge to the local minima and indefinitely stay there.\n",
"In such a case, we can choose to randomly restart it a few times with different starting conditions and return the result with the lowest total cost.\n",
"Sometimes, it is better to use random walks instead of random restarts depending on the problem, but progress can still be very slow.\n",
"A better improvement would be to give hill-climbing a memory element.\n",
"We store the current best heuristic estimate and it is updated as the agent gains experience in the state space.\n",
"The estimated optimal cost is made more and more accurate as time passes and each time the the local minima is \"flattened out\" until we escape it.\n",
"<br>\n",
"This learning scheme is a simple improvement upon traditional hill-climbing and is called _learning real-time A*_ or __LRTA*__.\n",
"Similar to _Online DFS-Agent_, it builds a map of the environment and chooses the best possible move according to its current heuristic estimates.\n",
"<br>\n",
"Actions that haven't been tried yet are assumed to lead immediately to the goal with the least possible cost.\n",
"This is called __optimism under uncertainty__ and encourages the agent to explore new promising paths.\n",
"This algorithm might not terminate if the state space is infinite, unlike A* search.\n",
"<br>\n",
"Let's have a look at the `LRTAStarAgent` class."
]
},
{
"cell_type": "code",
6125
6126
6127
6128
6129
6130
6131
6132
6133
6134
6135
6136
6137
6138
6139
6140
6141
6142
6143
6144
6145
6146
6147
6148
6149
6150
6151
6152
6153
6154
6155
6156
6157
6158
6159
6160
6161
6162
6163
6164
6165
6166
6167
6168
6169
6170
6171
6172
6173
6174
6175
6176
6177
6178
6179
6180
6181
6182
6183
6184
6185
6186
6187
6188
6189
6190
6191
6192
6193
6194
6195
6196
6197
6198
6199
6200
6201
6202
6203
6204
6205
6206
6207
6208
6209
6210
6211
6212
6213
6214
6215
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01//EN\"\n",
" \"http://www.w3.org/TR/html4/strict.dtd\">\n",
"\n",
"<html>\n",
"<head>\n",
" <title></title>\n",
" <meta http-equiv=\"content-type\" content=\"text/html; charset=None\">\n",
" <style type=\"text/css\">\n",
"td.linenos { background-color: #f0f0f0; padding-right: 10px; }\n",
"span.lineno { background-color: #f0f0f0; padding: 0 5px 0 5px; }\n",
"pre { line-height: 125%; }\n",
"body .hll { background-color: #ffffcc }\n",
"body { background: #f8f8f8; }\n",
"body .c { color: #408080; font-style: italic } /* Comment */\n",
"body .err { border: 1px solid #FF0000 } /* Error */\n",
"body .k { color: #008000; font-weight: bold } /* Keyword */\n",
"body .o { color: #666666 } /* Operator */\n",
"body .ch { color: #408080; font-style: italic } /* Comment.Hashbang */\n",
"body .cm { color: #408080; font-style: italic } /* Comment.Multiline */\n",
"body .cp { color: #BC7A00 } /* Comment.Preproc */\n",
"body .cpf { color: #408080; font-style: italic } /* Comment.PreprocFile */\n",
"body .c1 { color: #408080; font-style: italic } /* Comment.Single */\n",
"body .cs { color: #408080; font-style: italic } /* Comment.Special */\n",
"body .gd { color: #A00000 } /* Generic.Deleted */\n",
"body .ge { font-style: italic } /* Generic.Emph */\n",
"body .gr { color: #FF0000 } /* Generic.Error */\n",
"body .gh { color: #000080; font-weight: bold } /* Generic.Heading */\n",
"body .gi { color: #00A000 } /* Generic.Inserted */\n",
"body .go { color: #888888 } /* Generic.Output */\n",
"body .gp { color: #000080; font-weight: bold } /* Generic.Prompt */\n",
"body .gs { font-weight: bold } /* Generic.Strong */\n",
"body .gu { color: #800080; font-weight: bold } /* Generic.Subheading */\n",
"body .gt { color: #0044DD } /* Generic.Traceback */\n",
"body .kc { color: #008000; font-weight: bold } /* Keyword.Constant */\n",
"body .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */\n",
"body .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */\n",
"body .kp { color: #008000 } /* Keyword.Pseudo */\n",
"body .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */\n",
"body .kt { color: #B00040 } /* Keyword.Type */\n",
"body .m { color: #666666 } /* Literal.Number */\n",
"body .s { color: #BA2121 } /* Literal.String */\n",
"body .na { color: #7D9029 } /* Name.Attribute */\n",
"body .nb { color: #008000 } /* Name.Builtin */\n",
"body .nc { color: #0000FF; font-weight: bold } /* Name.Class */\n",
"body .no { color: #880000 } /* Name.Constant */\n",
"body .nd { color: #AA22FF } /* Name.Decorator */\n",
"body .ni { color: #999999; font-weight: bold } /* Name.Entity */\n",
"body .ne { color: #D2413A; font-weight: bold } /* Name.Exception */\n",
"body .nf { color: #0000FF } /* Name.Function */\n",
"body .nl { color: #A0A000 } /* Name.Label */\n",
"body .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */\n",
"body .nt { color: #008000; font-weight: bold } /* Name.Tag */\n",
"body .nv { color: #19177C } /* Name.Variable */\n",
"body .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */\n",
"body .w { color: #bbbbbb } /* Text.Whitespace */\n",
"body .mb { color: #666666 } /* Literal.Number.Bin */\n",
"body .mf { color: #666666 } /* Literal.Number.Float */\n",
"body .mh { color: #666666 } /* Literal.Number.Hex */\n",
"body .mi { color: #666666 } /* Literal.Number.Integer */\n",
"body .mo { color: #666666 } /* Literal.Number.Oct */\n",
"body .sa { color: #BA2121 } /* Literal.String.Affix */\n",
"body .sb { color: #BA2121 } /* Literal.String.Backtick */\n",
"body .sc { color: #BA2121 } /* Literal.String.Char */\n",
"body .dl { color: #BA2121 } /* Literal.String.Delimiter */\n",
"body .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */\n",
"body .s2 { color: #BA2121 } /* Literal.String.Double */\n",
"body .se { color: #BB6622; font-weight: bold } /* Literal.String.Escape */\n",
"body .sh { color: #BA2121 } /* Literal.String.Heredoc */\n",
"body .si { color: #BB6688; font-weight: bold } /* Literal.String.Interpol */\n",
"body .sx { color: #008000 } /* Literal.String.Other */\n",
"body .sr { color: #BB6688 } /* Literal.String.Regex */\n",
"body .s1 { color: #BA2121 } /* Literal.String.Single */\n",
"body .ss { color: #19177C } /* Literal.String.Symbol */\n",
"body .bp { color: #008000 } /* Name.Builtin.Pseudo */\n",
"body .fm { color: #0000FF } /* Name.Function.Magic */\n",
"body .vc { color: #19177C } /* Name.Variable.Class */\n",
"body .vg { color: #19177C } /* Name.Variable.Global */\n",
"body .vi { color: #19177C } /* Name.Variable.Instance */\n",
"body .vm { color: #19177C } /* Name.Variable.Magic */\n",
"body .il { color: #666666 } /* Literal.Number.Integer.Long */\n",
"\n",
" </style>\n",
"</head>\n",
"<body>\n",
"<h2></h2>\n",
"\n",
"<div class=\"highlight\"><pre><span></span><span class=\"k\">class</span> <span class=\"nc\">LRTAStarAgent</span><span class=\"p\">:</span>\n",
6218
6219
6220
6221
6222
6223
6224
6225
6226
6227
6228
6229
6230
6231
6232
6233
6234
6235
6236
6237
6238
6239
6240
6241
6242
6243
6244
6245
6246
6247
6248
6249
6250
6251
6252
6253
6254
6255
6256
6257
6258
6259
6260
6261
6262
6263
6264
6265
" <span class=\"sd\">""" [Figure 4.24]</span>\n",
"<span class=\"sd\"> Abstract class for LRTA*-Agent. A problem needs to be</span>\n",
"<span class=\"sd\"> provided which is an instance of a subclass of Problem Class.</span>\n",
"\n",
"<span class=\"sd\"> Takes a OnlineSearchProblem [Figure 4.23] as a problem.</span>\n",
"<span class=\"sd\"> """</span>\n",
"\n",
" <span class=\"k\">def</span> <span class=\"fm\">__init__</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"p\">,</span> <span class=\"n\">problem</span><span class=\"p\">):</span>\n",
" <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">problem</span> <span class=\"o\">=</span> <span class=\"n\">problem</span>\n",
" <span class=\"c1\"># self.result = {} # no need as we are using problem.result</span>\n",
" <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">H</span> <span class=\"o\">=</span> <span class=\"p\">{}</span>\n",
" <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">s</span> <span class=\"o\">=</span> <span class=\"bp\">None</span>\n",
" <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">a</span> <span class=\"o\">=</span> <span class=\"bp\">None</span>\n",
"\n",
" <span class=\"k\">def</span> <span class=\"fm\">__call__</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"p\">,</span> <span class=\"n\">s1</span><span class=\"p\">):</span> <span class=\"c1\"># as of now s1 is a state rather than a percept</span>\n",
" <span class=\"k\">if</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">problem</span><span class=\"o\">.</span><span class=\"n\">goal_test</span><span class=\"p\">(</span><span class=\"n\">s1</span><span class=\"p\">):</span>\n",
" <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">a</span> <span class=\"o\">=</span> <span class=\"bp\">None</span>\n",
" <span class=\"k\">return</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">a</span>\n",
" <span class=\"k\">else</span><span class=\"p\">:</span>\n",
" <span class=\"k\">if</span> <span class=\"n\">s1</span> <span class=\"ow\">not</span> <span class=\"ow\">in</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">H</span><span class=\"p\">:</span>\n",
" <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">H</span><span class=\"p\">[</span><span class=\"n\">s1</span><span class=\"p\">]</span> <span class=\"o\">=</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">problem</span><span class=\"o\">.</span><span class=\"n\">h</span><span class=\"p\">(</span><span class=\"n\">s1</span><span class=\"p\">)</span>\n",
" <span class=\"k\">if</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">s</span> <span class=\"ow\">is</span> <span class=\"ow\">not</span> <span class=\"bp\">None</span><span class=\"p\">:</span>\n",
" <span class=\"c1\"># self.result[(self.s, self.a)] = s1 # no need as we are using problem.output</span>\n",
"\n",
" <span class=\"c1\"># minimum cost for action b in problem.actions(s)</span>\n",
" <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">H</span><span class=\"p\">[</span><span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">s</span><span class=\"p\">]</span> <span class=\"o\">=</span> <span class=\"nb\">min</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">LRTA_cost</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">s</span><span class=\"p\">,</span> <span class=\"n\">b</span><span class=\"p\">,</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">problem</span><span class=\"o\">.</span><span class=\"n\">output</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">s</span><span class=\"p\">,</span> <span class=\"n\">b</span><span class=\"p\">),</span>\n",
" <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">H</span><span class=\"p\">)</span> <span class=\"k\">for</span> <span class=\"n\">b</span> <span class=\"ow\">in</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">problem</span><span class=\"o\">.</span><span class=\"n\">actions</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">s</span><span class=\"p\">))</span>\n",
"\n",
" <span class=\"c1\"># an action b in problem.actions(s1) that minimizes costs</span>\n",
" <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">a</span> <span class=\"o\">=</span> <span class=\"n\">argmin</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">problem</span><span class=\"o\">.</span><span class=\"n\">actions</span><span class=\"p\">(</span><span class=\"n\">s1</span><span class=\"p\">),</span>\n",
" <span class=\"n\">key</span><span class=\"o\">=</span><span class=\"k\">lambda</span> <span class=\"n\">b</span><span class=\"p\">:</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">LRTA_cost</span><span class=\"p\">(</span><span class=\"n\">s1</span><span class=\"p\">,</span> <span class=\"n\">b</span><span class=\"p\">,</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">problem</span><span class=\"o\">.</span><span class=\"n\">output</span><span class=\"p\">(</span><span class=\"n\">s1</span><span class=\"p\">,</span> <span class=\"n\">b</span><span class=\"p\">),</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">H</span><span class=\"p\">))</span>\n",
"\n",
" <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">s</span> <span class=\"o\">=</span> <span class=\"n\">s1</span>\n",
" <span class=\"k\">return</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">a</span>\n",
"\n",
" <span class=\"k\">def</span> <span class=\"nf\">LRTA_cost</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"p\">,</span> <span class=\"n\">s</span><span class=\"p\">,</span> <span class=\"n\">a</span><span class=\"p\">,</span> <span class=\"n\">s1</span><span class=\"p\">,</span> <span class=\"n\">H</span><span class=\"p\">):</span>\n",
" <span class=\"sd\">"""Returns cost to move from state 's' to state 's1' plus</span>\n",
"<span class=\"sd\"> estimated cost to get to goal from s1."""</span>\n",
" <span class=\"k\">print</span><span class=\"p\">(</span><span class=\"n\">s</span><span class=\"p\">,</span> <span class=\"n\">a</span><span class=\"p\">,</span> <span class=\"n\">s1</span><span class=\"p\">)</span>\n",
" <span class=\"k\">if</span> <span class=\"n\">s1</span> <span class=\"ow\">is</span> <span class=\"bp\">None</span><span class=\"p\">:</span>\n",
" <span class=\"k\">return</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">problem</span><span class=\"o\">.</span><span class=\"n\">h</span><span class=\"p\">(</span><span class=\"n\">s</span><span class=\"p\">)</span>\n",
" <span class=\"k\">else</span><span class=\"p\">:</span>\n",
" <span class=\"c1\"># sometimes we need to get H[s1] which we haven't yet added to H</span>\n",
" <span class=\"c1\"># to replace this try, except: we can initialize H with values from problem.h</span>\n",
" <span class=\"k\">try</span><span class=\"p\">:</span>\n",
" <span class=\"k\">return</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">problem</span><span class=\"o\">.</span><span class=\"n\">c</span><span class=\"p\">(</span><span class=\"n\">s</span><span class=\"p\">,</span> <span class=\"n\">a</span><span class=\"p\">,</span> <span class=\"n\">s1</span><span class=\"p\">)</span> <span class=\"o\">+</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">H</span><span class=\"p\">[</span><span class=\"n\">s1</span><span class=\"p\">]</span>\n",
" <span class=\"k\">except</span><span class=\"p\">:</span>\n",
" <span class=\"k\">return</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">problem</span><span class=\"o\">.</span><span class=\"n\">c</span><span class=\"p\">(</span><span class=\"n\">s</span><span class=\"p\">,</span> <span class=\"n\">a</span><span class=\"p\">,</span> <span class=\"n\">s1</span><span class=\"p\">)</span> <span class=\"o\">+</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">problem</span><span class=\"o\">.</span><span class=\"n\">h</span><span class=\"p\">(</span><span class=\"n\">s1</span><span class=\"p\">)</span>\n",
"</pre></div>\n",
"</body>\n",
"</html>\n"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
6279
6280
6281
6282
6283
6284
6285
6286
6287
6288
6289
6290
6291
6292
6293
6294
6295
6296
6297
6298
6299
6300
"psource(LRTAStarAgent)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"`H` stores the heuristic cost of the paths the agent may travel to.\n",
"<br>\n",
"`s` and `a` store the state and the action respectively.\n",
"<br>\n",
"`problem` stores the problem definition and the current map of the environment is stored in `problem.result`.\n",
"<br>\n",
"The `LRTA_cost` method computes the cost of a new path given the current state `s`, the action `a`, the next state `s1` and the estimated cost to get from `s` to `s1` is extracted from `H`."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's use `LRTAStarAgent` to solve a simple problem.\n",
"We'll define a new `LRTA_problem` instance based on our `one_dim_state_space`."
]
},
{
"cell_type": "code",
6305
6306
6307
6308
6309
6310
6311
6312
6313
6314
6315
6316
6317
6318
6319
6320
6321
6322
6323
6324
6325
6326
6327
6328
6329
6330
6331
6332
6333
6334
6335
6336
6337
6338
6339
6340
6341
6342
6343
6344
6345
6346
6347
6348
6349
6350
6351
6352
6353
6354
6355
6356
6357
6358
6359
6360
6361
6362
6363
6364
"execution_count": 83,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<search.Graph at 0x1eddaa35ef0>"
]
},
"execution_count": 83,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"one_dim_state_space"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's define an instance of `OnlineSearchProblem`."
]
},
{
"cell_type": "code",
"execution_count": 84,
"metadata": {},
"outputs": [],
"source": [
"LRTA_problem = OnlineSearchProblem('State_3', 'State_5', one_dim_state_space)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now we initialize a `LRTAStarAgent` object for the problem we just defined."
]
},
{
"cell_type": "code",
"execution_count": 85,
"metadata": {},
"outputs": [],
"source": [
"lrta_agent = LRTAStarAgent(LRTA_problem)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We'll pass the percepts `[State_3, State_4, State_3, State_4, State_5]` one-by-one to our agent to see what action it comes up with at each timestep."
]
},
{
"cell_type": "code",
"execution_count": 86,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"State_3 Right State_4\n",
"State_3 Left State_2\n"
},
{
"data": {
"text/plain": [
"'Right'"
]
},
"execution_count": 86,
"metadata": {},
"output_type": "execute_result"
"cell_type": "code",
"execution_count": 87,
6394
6395
6396
6397
6398
6399
6400
6401
6402
6403
6404
6405
6406
6407
6408
6409
6410
6411
6412
6413
6414
6415
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"State_3 Right State_4\n",
"State_3 Left State_2\n",
"State_4 Right State_5\n",
"State_4 Left State_3\n"
]
},
{
"data": {
"text/plain": [
"'Left'"
]
},
"execution_count": 87,
"metadata": {},
"output_type": "execute_result"
}
],
]
},
{
"cell_type": "code",
6422
6423
6424
6425
6426
6427
6428
6429
6430
6431
6432
6433
6434
6435
6436
6437
6438
6439
6440
6441
6442
6443
6444
6445
"execution_count": 88,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"State_4 Right State_5\n",
"State_4 Left State_3\n",
"State_3 Right State_4\n",
"State_3 Left State_2\n"
]
},
{
"data": {
"text/plain": [
"'Right'"
]
},
"execution_count": 88,
"metadata": {},
"output_type": "execute_result"
}
],
]
},
{
"cell_type": "code",
{
"name": "stdout",
"output_type": "stream",
"text": [
"State_3 Right State_4\n",
"State_3 Left State_2\n",
"State_4 Right State_5\n",
"State_4 Left State_3\n"
]
},
{
"data": {
"text/plain": [
6477
6478
6479
6480
6481
6482
6483
6484
6485
6486
6487
6488
6489
6490
6491
6492
6493
6494
6495
6496
6497
6498
6499
6500
"lrta_agent('State_4')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"If you manually try to see what the optimal action should be at each step, the outputs of the `lrta_agent` will start to make sense if it doesn't already."
]
},
{
"cell_type": "code",
"execution_count": 90,
"metadata": {},
"outputs": [],
"source": [
"lrta_agent('State_5')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"There is no possible action for this state."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<br>\n",
"This concludes the notebook.\n",
"Hope you learned something new!"
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
6530
6531
6532
6533
6534
6535
6536
6537
6538
6539
6540
6541
6542
6543
6544
6545
6546
6547
6548
6549
6550
6551
6552
6553
6554
6555
6556
6557
6558
6559
6560
6561
6562
},
"widgets": {
"state": {
"1516e2501ddd4a2e8e3250bffc0164db": {
"views": [
{
"cell_index": 59
}
]
},
"17be64c89a9a4a43b3272cb018df0970": {
"views": [
{
"cell_index": 59
}
]
},
"ac05040009a340b0af81b0ee69161fbc": {
"views": [
{
"cell_index": 59
}
]
},
"d9735ffe77c24f13ae4ad3620ce84334": {
"views": [
{
"cell_index": 59
}
]
}
},
"version": "1.2.0"
}
},
"nbformat": 4,