<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en">
	<id>https://en.formulasearchengine.com/index.php?action=history&amp;feed=atom&amp;title=Layer_cake_representation</id>
	<title>Layer cake representation - Revision history</title>
	<link rel="self" type="application/atom+xml" href="https://en.formulasearchengine.com/index.php?action=history&amp;feed=atom&amp;title=Layer_cake_representation"/>
	<link rel="alternate" type="text/html" href="https://en.formulasearchengine.com/index.php?title=Layer_cake_representation&amp;action=history"/>
	<updated>2026-05-29T23:56:33Z</updated>
	<subtitle>Revision history for this page on the wiki</subtitle>
	<generator>MediaWiki 1.43.0-wmf.28</generator>
	<entry>
		<id>https://en.formulasearchengine.com/index.php?title=Layer_cake_representation&amp;diff=253835&amp;oldid=prev</id>
		<title>207.38.151.173 at 06:15, 12 February 2014</title>
		<link rel="alternate" type="text/html" href="https://en.formulasearchengine.com/index.php?title=Layer_cake_representation&amp;diff=253835&amp;oldid=prev"/>
		<updated>2014-02-12T06:15:11Z</updated>

		<summary type="html">&lt;p&gt;&lt;/p&gt;
&lt;a href=&quot;https://en.formulasearchengine.com/index.php?title=Layer_cake_representation&amp;amp;diff=253835&amp;amp;oldid=16826&quot;&gt;Show changes&lt;/a&gt;</summary>
		<author><name>207.38.151.173</name></author>
	</entry>
	<entry>
		<id>https://en.formulasearchengine.com/index.php?title=Layer_cake_representation&amp;diff=16826&amp;oldid=prev</id>
		<title>en&gt;Mark viking: Added wl</title>
		<link rel="alternate" type="text/html" href="https://en.formulasearchengine.com/index.php?title=Layer_cake_representation&amp;diff=16826&amp;oldid=prev"/>
		<updated>2013-07-12T09:25:06Z</updated>

		<summary type="html">&lt;p&gt;Added wl&lt;/p&gt;
&lt;table style=&quot;background-color: #fff; color: #202122;&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;en&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #202122; text-align: center;&quot;&gt;← Older revision&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #202122; text-align: center;&quot;&gt;Revision as of 11:25, 12 July 2013&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l1&quot;&gt;Line 1:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 1:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;The title &lt;/del&gt;of the &lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;writer is Nestor&lt;/del&gt;. &lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;She functions &lt;/del&gt;as a &lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;financial officer &lt;/del&gt;and &lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;she &lt;/del&gt;will &lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;not change &lt;/del&gt;it &lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;whenever soon&lt;/del&gt;. &lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;Years ago we moved &lt;/del&gt;to &lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;Kansas&lt;/del&gt;. &lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;To maintain birds is one &lt;/del&gt;of the &lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;issues he loves &lt;/del&gt;most.&amp;lt;&lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;br&lt;/del&gt;&amp;gt;&amp;lt;&lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;br&lt;/del&gt;&amp;gt;&lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;my website :: [&lt;/del&gt;http://&lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;Pchelpnow&lt;/del&gt;.&lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;biz&lt;/del&gt;/&lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;ActivityFeed&lt;/del&gt;/&lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;MyProfile&lt;/del&gt;/&lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;tabid&lt;/del&gt;/&lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;60&lt;/del&gt;/&lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;userId&lt;/del&gt;/&lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;48445&lt;/del&gt;/&lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;Default&lt;/del&gt;.&lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;aspx Pchelpnow&lt;/del&gt;.&lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;biz&lt;/del&gt;]&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;{{About|the machine learning algorithm|the village in the Anand District &lt;/ins&gt;of &lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;Gujarat in India|Sarsa}}&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;{{Wikiversity|SARSA}}&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt; &lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;&#039;&#039;&#039;SARSA&#039;&#039;&#039; (&#039;&#039;&#039;State-Action-Reward-State-Action&#039;&#039;&#039;) is an [[algorithm]] for learning a [[Markov decision process]] policy, used in &lt;/ins&gt;the &lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;[[reinforcement learning]] area of [[machine learning]].  It was introduced in a technical note &amp;lt;ref&amp;gt;[http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.17&lt;/ins&gt;.&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;2539&amp;amp;rep=rep1&amp;amp;type=pdf Online Q-Learning using Connectionist Systems&quot; by Rummery &amp;amp; Niranjan (1994)]&amp;lt;/ref&amp;gt; where the alternative name SARSA was only mentioned &lt;/ins&gt;as a &lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;footnote.&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt; &lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;This name simply reflects the fact that the main function for updating the Q-value depends on the current state of the agent &quot;&#039;&#039;&#039;S&#039;&#039;&#039;&amp;lt;sub&amp;gt;1&amp;lt;/sub&amp;gt;&quot;, the action the agent chooses &quot;&#039;&#039;&#039;A&#039;&#039;&#039;&amp;lt;sub&amp;gt;1&amp;lt;/sub&amp;gt;&quot;, the reward &quot;&#039;&#039;&#039;R&#039;&#039;&#039;&quot; the agent gets for choosing this action, the state &quot;&#039;&#039;&#039;S&#039;&#039;&#039;&amp;lt;sub&amp;gt;2&amp;lt;/sub&amp;gt;&quot; that the agent will now be in after taking that action, and finally the next action &quot;&#039;&#039;&#039;A&#039;&#039;&#039;&amp;lt;sub&amp;gt;2&amp;lt;/sub&amp;gt;&quot; the agent will choose in its new state. Taking every letter in the quintuple (s&amp;lt;sub&amp;gt;t&amp;lt;/sub&amp;gt;, a&amp;lt;sub&amp;gt;t&amp;lt;/sub&amp;gt;, r&amp;lt;sub&amp;gt;t&amp;lt;/sub&amp;gt;, s&amp;lt;sub&amp;gt;t+1&amp;lt;/sub&amp;gt;, a&amp;lt;sub&amp;gt;t+1&amp;lt;/sub&amp;gt;) yields the word &#039;&#039;SARSA&#039;&#039;.&amp;lt;ref&amp;gt;[http://webdocs.cs.ualberta.ca/~sutton/book/ebook/node64.html Reinforcement Learning: An Introduction Richard S. Sutton &lt;/ins&gt;and &lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;Andrew G. Barto (chapter 6.4)]&amp;lt;/ref&amp;gt;&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt; &lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;== Algorithm ==&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;:&amp;lt;math&amp;gt;Q(s_t,a_t) \leftarrow Q(s_t,a_t) + \alpha [r_{t} + \gamma Q(s_{t+1}, a_{t+1})-Q(s_t,a_t)]&amp;lt;/math&amp;gt;&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt; &lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;A SARSA agent &lt;/ins&gt;will &lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;interact with the environment and update the policy based on actions taken, known as an on-policy learning algorithm.  As expressed above, the Q value for a state-action is updated by an error, adjusted by the learning rate alpha. Q values represent the possible reward received in the next time step for taking action &#039;&#039;a&#039;&#039; in state &#039;&#039;s&#039;&#039;, plus the discounted future reward received from the next state-action observation. Watkin&#039;s [[Q-learning]] was created as an alternative to the existing [[temporal difference learning|temporal difference technique]] and which updates the policy based on the maximum reward of available actions.  The difference may be explained as SARSA learns the Q values associated with taking the policy &lt;/ins&gt;it &lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;follows itself, while Watkin&#039;s Q-learning learns the Q values associated with taking the exploitation policy while following an exploration/exploitation policy.  For further information on the exploration/exploitation trade off, see [[reinforcement learning]]&lt;/ins&gt;.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt; &lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;Some optimizations of Watkin&#039;s Q-learning may also be applied &lt;/ins&gt;to &lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;SARSA, for example in the paper &quot;Fast Online Q(λ)&quot; (Wiering and Schmidhuber, 1998) the small differences needed for SARSA(λ) implementations are described as they arise&lt;/ins&gt;.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt; &lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;== Influence of variables on the algorithm ==&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt; &lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;=== Learning rate (alpha)===&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt; &lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;The learning rate determines to what extent the newly acquired information will override the old information. A factor of 0 will make the agent not learn anything, while a factor &lt;/ins&gt;of &lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;1 would make the agent consider only &lt;/ins&gt;the most &lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;recent information.&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt; &lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;=== Discount factor (gamma) ===&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt; &lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;The discount factor determines the importance of future rewards. A factor of 0 will make the agent &quot;opportunistic&quot; by only considering current rewards, while a factor approaching 1 will make it strive for a long-term high reward. If the discount factor meets or exceeds 1, the &amp;lt;math&amp;gt;Q&amp;lt;/math&amp;gt; values may diverge&lt;/ins&gt;.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt; &lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;=== Initial conditions (&lt;/ins&gt;&amp;lt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;math&lt;/ins&gt;&amp;gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;Q(s_0,a_0)&lt;/ins&gt;&amp;lt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;/math&amp;gt;) ===&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt; &lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;Since SARSA is an iterative algorithm, it implicitly assumes an initial condition before the first update occur. A high (infinite) initial value, also known as &quot;optimistic initial conditions&quot;,&amp;lt;ref&lt;/ins&gt;&amp;gt;http://&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;webdocs&lt;/ins&gt;.&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;cs.ualberta.ca&lt;/ins&gt;/&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;~sutton&lt;/ins&gt;/&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;book&lt;/ins&gt;/&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;ebook&lt;/ins&gt;/&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;node21.html&amp;lt;&lt;/ins&gt;/&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;ref&amp;gt; can encourage exploration: no matter what action will take place, the update rule will cause it to have lower values than the other alternative, thus increasing their choice probability. Recently, it was suggested that the first reward &amp;lt;math&amp;gt;r&amp;lt;&lt;/ins&gt;/&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;math&amp;gt; could be used to reset the initial conditions. According to this idea, the first time an action is taken the reward is used to set the value of &amp;lt;math&amp;gt;Q&amp;lt;&lt;/ins&gt;/&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;math&amp;gt;&lt;/ins&gt;. &lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;This will allow immediate learning in case of fix deterministic rewards. Surprisingly, this resetting-of-initial-conditions (RIC) approach seems to be consistent with human behaviour in repeated binary choice experiments&lt;/ins&gt;.&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;&amp;lt;ref&amp;gt;[http://www.ncbi.nlm.nih.gov/pubmed/22924882 The Role of First Impression in Operant Learning. Shteingart H, Neiman T, Loewenstein Y. J Exp Psychol Gen. 2013 May; 142(2):476-88. doi: 10.1037/a0029550. Epub 2012 Aug 27.]&amp;lt;/ref&amp;gt;&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt; &lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;== See also ==&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;* [[Reinforcement learning]]&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;* [[Temporal difference learning]]&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;* [[Q-learning]]&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt; &lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;==References==&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;{{reflist}}&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt; &lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;[[Category:Machine learning algorithms]&lt;/ins&gt;]&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>en&gt;Mark viking</name></author>
	</entry>
	<entry>
		<id>https://en.formulasearchengine.com/index.php?title=Layer_cake_representation&amp;diff=253834&amp;oldid=prev</id>
		<title>en&gt;Michael Hardy at 19:17, 13 August 2012</title>
		<link rel="alternate" type="text/html" href="https://en.formulasearchengine.com/index.php?title=Layer_cake_representation&amp;diff=253834&amp;oldid=prev"/>
		<updated>2012-08-13T19:17:57Z</updated>

		<summary type="html">&lt;p&gt;&lt;/p&gt;
&lt;p&gt;&lt;b&gt;New page&lt;/b&gt;&lt;/p&gt;&lt;div&gt;The title of the writer is Nestor. She functions as a financial officer and she will not change it whenever soon. Years ago we moved to Kansas. To maintain birds is one of the issues he loves most.&amp;lt;br&amp;gt;&amp;lt;br&amp;gt;my website :: [http://Pchelpnow.biz/ActivityFeed/MyProfile/tabid/60/userId/48445/Default.aspx Pchelpnow.biz]&lt;/div&gt;</summary>
		<author><name>en&gt;Michael Hardy</name></author>
	</entry>
</feed>