fix bugs in comments for tiger tutorial

zkytony · zkytony · commit b2312351e251 · 2022-04-04T06:01:32.000-04:00
diff --git a/docs/_sphinx_src/examples.tiger.rst b/docs/_sphinx_src/examples.tiger.rst
@@ -386,7 +386,7 @@ For the Tiger problem, we implemented this procedure as follows:
             print("True state:", tiger_problem.env.state)
             print("Belief:", tiger_problem.agent.cur_belief)
             print("Action:", action)
-            # Step 3; no transition since actions in Tiger problem
+            # Step 3; There is no state transition for the tiger domain.
             # In general, the ennvironment state can be transitioned
             # using
             #
@@ -402,10 +402,9 @@ For the Tiger problem, we implemented this procedure as follows:
 
             # Step 4
             # Let's create some simulated real observation;
-            # Update the belief Creating true observation for
-            # sanity checking solver behavior. In general, this
-            # observation should be sampled from agent's observation
-            # model, as
+            # Here, we use observation based on true state for sanity
+            # checking solver behavior. In general, this observation
+            # should be sampled from agent's observation model, as
             #
             #    real_observation = tiger_problem.agent.observation_model.sample(tiger_problem.env.state, action)
             #
@@ -416,6 +415,8 @@ For the Tiger problem, we implemented this procedure as follows:
             print(">> Observation: %s" % real_observation)
 
             # Step 5
+            # Update the belief. If the planner is POMCP, planner.update
+            # also automatically updates agent belief.
             tiger_problem.agent.update_history(action, real_observation)
             planner.update(tiger_problem.agent, action, real_observation)
             if isinstance(planner, pomdp_py.POUCT):
diff --git a/pomdp_problems/tiger/tiger_problem.py b/pomdp_problems/tiger/tiger_problem.py
@@ -263,10 +263,9 @@ def test_planner(tiger_problem, planner, nsteps=3,
         print("Reward:", reward)
 
         # Let's create some simulated real observation;
-        # Update the belief Creating true observation for
-        # sanity checking solver behavior. In general, this
-        # observation should be sampled from agent's observation
-        # model, as
+        # Here, we use observation based on true state for sanity
+        # checking solver behavior. In general, this observation
+        # should be sampled from agent's observation model, as
         #
         #    real_observation = tiger_problem.agent.observation_model.sample(tiger_problem.env.state, action)
         #
@@ -277,7 +276,8 @@ def test_planner(tiger_problem, planner, nsteps=3,
         print(">> Observation:",  real_observation)
         tiger_problem.agent.update_history(action, real_observation)
 
-        # If the planner is POMCP, planner.update also updates agent belief.
+        # Update the belief. If the planner is POMCP, planner.update
+        # also automatically updates agent belief.
         planner.update(tiger_problem.agent, action, real_observation)
         if isinstance(planner, pomdp_py.POUCT):
             print("Num sims:", planner.last_num_sims)