@@ -386,7 +386,7 @@ For the Tiger problem, we implemented this procedure as follows:
386
386
print (" True state:" , tiger_problem.env.state)
387
387
print (" Belief:" , tiger_problem.agent.cur_belief)
388
388
print (" Action:" , action)
389
- # Step 3; no transition since actions in Tiger problem
389
+ # Step 3; There is no state transition for the tiger domain.
390
390
# In general, the ennvironment state can be transitioned
391
391
# using
392
392
#
@@ -402,10 +402,9 @@ For the Tiger problem, we implemented this procedure as follows:
402
402
403
403
# Step 4
404
404
# Let's create some simulated real observation;
405
- # Update the belief Creating true observation for
406
- # sanity checking solver behavior. In general, this
407
- # observation should be sampled from agent's observation
408
- # model, as
405
+ # Here, we use observation based on true state for sanity
406
+ # checking solver behavior. In general, this observation
407
+ # should be sampled from agent's observation model, as
409
408
#
410
409
# real_observation = tiger_problem.agent.observation_model.sample(tiger_problem.env.state, action)
411
410
#
@@ -416,6 +415,8 @@ For the Tiger problem, we implemented this procedure as follows:
416
415
print (" >> Observation: %s " % real_observation)
417
416
418
417
# Step 5
418
+ # Update the belief. If the planner is POMCP, planner.update
419
+ # also automatically updates agent belief.
419
420
tiger_problem.agent.update_history(action, real_observation)
420
421
planner.update(tiger_problem.agent, action, real_observation)
421
422
if isinstance (planner, pomdp_py.POUCT ):
0 commit comments