Need help in defining and solving a POMDP: Varying Mountainhike

I am working on a POMDP called VaryingMountainHike (which I have redubbed as MordorHike :wink: ). It was introduced [here](https://github.com/glambrechts/belief-rnn). It is basically a 2d environment with mountains defined by 3 gaussians. The agent has to get from bottom left to top right. The rewards are always negative, and it is least negative on top of the mountains.

I am totally new to julia and pomdp planners. So bear with me if I had made some bugs in the implementation.  

``` julia
import POMDPs
import POMDPModels
import Distributions

using POMDPs
using POMCPOW
using POMDPModels
using POMDPTools
using POMDPs
using POMDPTools
using LinearAlgebra
using Distributions
using Random

mutable struct MordorHikePOMDP <: POMDP{Vector{Float64}, Int64, Float64}
    # Constants
    translate_step::Float64
    translate_std::Float64
    obs_std::Float64
    discount::Float64
    
    # Map bounds and positions
    map_lower_bound::Vector{Float64}
    map_upper_bound::Vector{Float64}
    fixed_start_pos::Vector{Float64}
    goal_position::Vector{Float64}
    
    # Mountain parameters
    slope::Vector{Float64}
    mvn_1::MvNormal
    mvn_2::MvNormal
    mvn_3::MvNormal

    # Constructor
    function MordorHikePOMDP()
        new(
            0.1,    # translate_step
            0.05,   # translate_std
            0.1,    # obs_std
            0.99,   # discount
            [-1.0, -1.0],  # map_lower_bound
            [1.0, 1.0],    # map_upper_bound
            [-0.8, -0.8],  # fixed_start_pos
            [0.8, 0.8],    # goal_position
            [0.2, 0.2],    # slope
            MvNormal([0.0, 0.0], [0.005 0.0; 0.0 1.0]),    # mvn_1
            MvNormal([0.0, -0.8], [1.0 0.0; 0.0 0.01]),    # mvn_2
            MvNormal([0.0, 0.8], [1.0 0.0; 0.0 0.01])      # mvn_3
        )
    end
end

# State: [x, y, θ]
# Action: 1=north, 2=south, 3=east, 4=west
# Observation: [altitude]

function POMDPs.actions(pomdp::MordorHikePOMDP)
    return [1, 2, 3, 4]
end

function POMDPs.initialstate(pomdp::MordorHikePOMDP)
    # Random rotation for medium difficulty
    return ImplicitDistribution(rng -> ([pomdp.fixed_start_pos[1], pomdp.fixed_start_pos[2], rand(rng, [0.0, π/2, π, 3π/2])]))
end

function POMDPs.reward(pomdp::MordorHikePOMDP, s::Vector{Float64}, a::Int64, sp::Vector{Float64})
    if isterminal(pomdp, s)
        0.0
    else
        return calculate_altitude(pomdp, sp[1:2])
    end
end
POMDPs.discount(pomdp::MordorHikePOMDP) = pomdp.discount

function POMDPs.isterminal(pomdp::MordorHikePOMDP, s::Vector{Float64})
    return norm(s[1:2] - pomdp.goal_position) <= 2 * pomdp.translate_step
end



function POMDPs.transition(pomdp::MordorHikePOMDP, s::Vector{Float64}, a::Int64)
    return ImplicitDistribution(function(rng)
        directions = Dict(
            1 => [0.0, 1.0],  # North
            2 => [0.0, -1.0], # South
            3 => [1.0, 0.0],  # East
            4 => [-1.0, 0.0]  # West
        )
        
        # Apply movement
        direction = directions[a]
        next_pos = s[1:2] + direction * pomdp.translate_step
        
        # Add noise to position
        next_pos += rand(rng, Normal(0, pomdp.translate_std), 2)
        
        # Clip to bounds
        next_pos = clamp.(next_pos, pomdp.map_lower_bound, pomdp.map_upper_bound)
        
        # Keep the same theta
        return [next_pos[1], next_pos[2], s[3]]
    end)
end



function POMDPs.observation(pomdp::MordorHikePOMDP, a::Int64, sp::Vector{Float64})
    altitude = calculate_altitude(pomdp, sp[1:2])
    return Normal(altitude, pomdp.obs_std)
end

function POMDPs.observation(pomdp::MordorHikePOMDP, s::Vector{Float64}, a::Int64, sp::Vector{Float64})
    return observation(pomdp, a, sp)
end


function calculate_altitude(pomdp::MordorHikePOMDP, pos::Vector{Float64})
    # Convert pos to a 2-element vector to ensure correct dimensionality
    pos_2d = pos[1:2]  # Take only x,y coordinates if pos is longer
    mountains = [
        pdf(pomdp.mvn_1, pos_2d),
        pdf(pomdp.mvn_2, pos_2d),
        pdf(pomdp.mvn_3, pos_2d)
    ]
    altitude = maximum(mountains)
    return -exp(-altitude) + dot(pos_2d, pomdp.slope) - 0.02
end



pomdp = MordorHikePOMDP()

# Use POMCPOW solver (as in your first code block)
using POMCPOW
solver = POMCPOWSolver(criterion=MaxUCB(20.0))
planner = solve(solver, pomdp)

# Simulate
hr = HistoryRecorder(max_steps=200,)
hist = simulate(hr, pomdp, planner)
for (s, b, a, r, sp, o) in hist
    @show s, a, r, sp
    println(isterminal(pomdp, s))
end

rhist = simulate(hr, pomdp, RandomPolicy(pomdp))
println("""
    Cumulative Discounted Reward (for 1 simulation)
        Random: $(discounted_reward(rhist))
        POMCPOW: $(discounted_reward(hist))
    """)
```

I need help on knowing what are the reasonable hyperparameters for the solver for this problem. When I used the default tree queries the rollout terminated before max steps without reaching the terminal state. 

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Need help in defining and solving a POMDP: Varying Mountainhike #43

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Need help in defining and solving a POMDP: Varying Mountainhike #43

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions