From 870d760b45a50d52b8d3088c7e1eb2a4efb5b977 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Wed, 27 Jan 2021 19:11:31 -0800 Subject: [PATCH 001/162] Update to a more recent version of rand. --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index cea9a90..fc0eceb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ keywords = ["ai", "game", "minimax", "negamax"] license = "MIT" [dependencies] -rand = "0.3.*" +rand = "0.4.*" [profile.test] opt-level = 3 From 2e14f54c3ce2b2d948ebc2ea79be78c74a01600b Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Wed, 27 Jan 2021 19:17:03 -0800 Subject: [PATCH 002/162] Fix all build warnings from stable rustc. --- examples/ttt.rs | 30 +++++++++++++++--------------- src/interface.rs | 3 --- src/util.rs | 2 +- 3 files changed, 16 insertions(+), 19 deletions(-) diff --git a/examples/ttt.rs b/examples/ttt.rs index 02f74e3..27ba946 100644 --- a/examples/ttt.rs +++ b/examples/ttt.rs @@ -69,21 +69,21 @@ impl Default for Board { impl Display for Board { fn fmt(&self, f: &mut Formatter) -> Result { - try!(writeln!(f, - "{} | {} | {}", - self.squares[0], - self.squares[1], - self.squares[2])); - try!(writeln!(f, - "{} | {} | {}", - self.squares[3], - self.squares[4], - self.squares[5])); - try!(writeln!(f, - "{} | {} | {}", - self.squares[6], - self.squares[7], - self.squares[8])); + writeln!(f, + "{} | {} | {}", + self.squares[0], + self.squares[1], + self.squares[2])?; + writeln!(f, + "{} | {} | {}", + self.squares[3], + self.squares[4], + self.squares[5])?; + writeln!(f, + "{} | {} | {}", + self.squares[6], + self.squares[7], + self.squares[8])?; Ok(()) } } diff --git a/src/interface.rs b/src/interface.rs index d9b8661..236bbbc 100644 --- a/src/interface.rs +++ b/src/interface.rs @@ -90,10 +90,8 @@ pub trait Move { /// The type of game that the move affects. type G: Game; /// Change the state of `S` so that the move is applied. - #[inline] fn apply(&self, &mut ::S); /// Revert the state of `S` so that the move is undone. - #[inline] fn undo(&self, &mut ::S); } @@ -125,7 +123,6 @@ pub trait Game : Sized { /// memory for the slice to be a stack-allocated array. One stable, this /// trait will be extended with an associated constant to specify the /// maximum number of moves. - #[inline] fn generate_moves(&Self::S, Player, &mut [Option]) -> usize; /// Returns `Some(Competitor(winning_player))` if there's a winner, diff --git a/src/util.rs b/src/util.rs index 2e44993..19c4d57 100644 --- a/src/util.rs +++ b/src/util.rs @@ -15,7 +15,7 @@ pub fn battle_royale(s1: &mut S1, s2: &mut S2) -> interface::Winner S2: interface::Strategy { let mut state = G::S::default(); - let mut strategies: Vec<(interface::Player, &mut interface::Strategy)> = vec![ + let mut strategies: [(interface::Player, &mut dyn interface::Strategy); 2] = [ (interface::Player::Computer, s1), (interface::Player::Opponent, s2), ]; From 9195125def30d00c899247861508a88f28d0510f Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Wed, 3 Feb 2021 19:47:31 -0800 Subject: [PATCH 003/162] Remove Player from the API interface. Game::State must keep track of player. Benefits: * API simplification: the Strategy, Winner, and Evaluator do not need to be passed the current player. * The caller of choose_move cannot move the same player twice in a row. * The Game implementer does not need to worry that a caller or Strategy will feed it bogus Players. * Simplify Evaluator by automatically handling terminal states. Drawbacks: * Game state must keep track of current player. --- benches/negamax.rs | 6 +-- examples/ttt.rs | 106 +++++++++++++++++--------------------- src/interface.rs | 91 +++++++++++--------------------- src/lib.rs | 2 +- src/strategies/negamax.rs | 22 ++++---- src/strategies/random.rs | 4 +- src/util.rs | 21 ++++---- tests/ttt.rs | 5 +- 8 files changed, 109 insertions(+), 148 deletions(-) diff --git a/benches/negamax.rs b/benches/negamax.rs index e4ba293..8403fa8 100644 --- a/benches/negamax.rs +++ b/benches/negamax.rs @@ -26,7 +26,7 @@ impl Game for Noop { type S = Board; type M = Place; - fn generate_moves(_: &Board, _: Player, ms: &mut [Option]) -> usize { + fn generate_moves(_: &Board, ms: &mut [Option]) -> usize { const NUM_MOVES: usize = 4; for m in ms.iter_mut().take(NUM_MOVES) { *m = Some(Place); @@ -43,7 +43,7 @@ impl Game for Noop { impl Evaluator for Eval { type G = Noop; - fn evaluate(_: &Board, _: Option) -> Evaluation { + fn evaluate(_: &Board) -> Evaluation { Evaluation::Score(0) } } @@ -52,5 +52,5 @@ impl Evaluator for Eval { fn bench_negamax(b: &mut Bencher) { let board = Board; let mut s = Negamax::::new(Options { max_depth: 10 }); - b.iter(|| s.choose_move(&board, Player::Computer)); + b.iter(|| s.choose_move(&board)); } diff --git a/examples/ttt.rs b/examples/ttt.rs index 27ba946..8598d3e 100644 --- a/examples/ttt.rs +++ b/examples/ttt.rs @@ -9,7 +9,6 @@ extern crate minimax; use std::default::Default; use std::fmt::{Display, Formatter, Result}; -use std::convert::From; #[derive(Copy, Clone, PartialEq, Eq)] #[repr(u8)] @@ -19,6 +18,16 @@ pub enum Square { O, } +impl Square { + fn invert(&self) -> Self { + match *self { + Square::Empty => Square::Empty, + Square::X => Square::O, + Square::O => Square::X, + } + } +} + impl Default for Square { fn default() -> Square { Square::Empty @@ -37,33 +46,21 @@ impl Display for Square { } } -impl From for Square { - fn from(p: minimax::Player) -> Square { - match p { - minimax::Player::Computer => Square::X, - minimax::Player::Opponent => Square::O, - } - } -} - -impl From for minimax::Player { - fn from(s: Square) -> minimax::Player { - match s { - Square::X => minimax::Player::Computer, - Square::O => minimax::Player::Opponent, - _ => panic!("From::from(Square::Empty))"), - } - } -} - #[derive(Clone, PartialEq, Eq)] pub struct Board { squares: [Square; 9], + to_move: Square, +} + +impl Board { + fn just_moved(&self) -> Square { + self.to_move.invert() + } } impl Default for Board { fn default() -> Board { - Board { squares: [Square::default(); 9] } + Board { squares: [Square::default(); 9], to_move: Square::X } } } @@ -94,14 +91,11 @@ impl minimax::Game for Game { type S = Board; type M = Place; - fn generate_moves(b: &Board, p: minimax::Player, ms: &mut [Option]) -> usize { + fn generate_moves(b: &Board, ms: &mut [Option]) -> usize { let mut j = 0; for i in 0..b.squares.len() { if b.squares[i] == Square::Empty { - ms[j] = Some(Place { - i: i as u8, - s: From::from(p), - }); + ms[j] = Some(Place { i: i as u8 }); j += 1; } } @@ -110,40 +104,42 @@ impl minimax::Game for Game { } fn get_winner(b: &Board) -> Option { + // A player can only cause themselves to win on their turn, so only check for that. + // horizontal wins - if b.squares[0] != Square::Empty && b.squares[0] == b.squares[1] && + if b.squares[0] == b.just_moved() && b.squares[0] == b.squares[1] && b.squares[1] == b.squares[2] { - return Some(minimax::Winner::Competitor(From::from(b.squares[0]))); + return Some(minimax::Winner::PlayerJustMoved); } - if b.squares[3] != Square::Empty && b.squares[3] == b.squares[4] && + if b.squares[3] == b.just_moved() && b.squares[3] == b.squares[4] && b.squares[4] == b.squares[5] { - return Some(minimax::Winner::Competitor(From::from(b.squares[3]))); + return Some(minimax::Winner::PlayerJustMoved); } - if b.squares[6] != Square::Empty && b.squares[6] == b.squares[7] && + if b.squares[6] == b.just_moved() && b.squares[6] == b.squares[7] && b.squares[7] == b.squares[8] { - return Some(minimax::Winner::Competitor(From::from(b.squares[6]))); + return Some(minimax::Winner::PlayerJustMoved); } // vertical wins - if b.squares[0] != Square::Empty && b.squares[0] == b.squares[3] && + if b.squares[0] == b.just_moved() && b.squares[0] == b.squares[3] && b.squares[3] == b.squares[6] { - return Some(minimax::Winner::Competitor(From::from(b.squares[0]))); + return Some(minimax::Winner::PlayerJustMoved); } - if b.squares[1] != Square::Empty && b.squares[1] == b.squares[4] && + if b.squares[1] == b.just_moved() && b.squares[1] == b.squares[4] && b.squares[4] == b.squares[7] { - return Some(minimax::Winner::Competitor(From::from(b.squares[1]))); + return Some(minimax::Winner::PlayerJustMoved); } - if b.squares[2] != Square::Empty && b.squares[2] == b.squares[5] && + if b.squares[2] == b.just_moved() && b.squares[2] == b.squares[5] && b.squares[5] == b.squares[8] { - return Some(minimax::Winner::Competitor(From::from(b.squares[2]))); + return Some(minimax::Winner::PlayerJustMoved); } // diagonal wins - if b.squares[0] != Square::Empty && b.squares[0] == b.squares[4] && + if b.squares[0] == b.just_moved() && b.squares[0] == b.squares[4] && b.squares[4] == b.squares[8] { - return Some(minimax::Winner::Competitor(From::from(b.squares[0]))); + return Some(minimax::Winner::PlayerJustMoved); } - if b.squares[2] != Square::Empty && b.squares[2] == b.squares[4] && + if b.squares[2] == b.just_moved() && b.squares[2] == b.squares[4] && b.squares[4] == b.squares[6] { - return Some(minimax::Winner::Competitor(From::from(b.squares[2]))); + return Some(minimax::Winner::PlayerJustMoved); } // draws if b.squares.iter().all(|s| *s != Square::Empty) { @@ -158,22 +154,23 @@ impl minimax::Game for Game { #[derive(Copy, Clone, PartialEq, Eq)] pub struct Place { i: u8, - s: Square, } impl Display for Place { fn fmt(&self, f: &mut Formatter) -> Result { - write!(f, "{}@{}", self.s, self.i) + write!(f, "@{}", self.i) } } impl minimax::Move for Place { type G = Game; fn apply(&self, b: &mut Board) { - b.squares[self.i as usize] = self.s; + b.squares[self.i as usize] = b.to_move; + b.to_move = b.to_move.invert(); } fn undo(&self, b: &mut Board) { b.squares[self.i as usize] = Square::Empty; + b.to_move = b.to_move.invert(); } } @@ -182,14 +179,7 @@ pub struct Evaluator; impl minimax::Evaluator for Evaluator { type G = Game; // adapted from http://www.cs.olemiss.edu/~dwilkins/CSCI531/tic.c - fn evaluate(b: &Board, mw: Option) -> minimax::Evaluation { - match mw { - Some(minimax::Winner::Competitor(wp)) => match wp { - minimax::Player::Computer => return minimax::Evaluation::Best, - minimax::Player::Opponent => return minimax::Evaluation::Worst, - }, - _ => {} - } + fn evaluate(b: &Board) -> minimax::Evaluation { let mut score = 0; // 3rd: check for doubles @@ -231,7 +221,7 @@ impl minimax::Evaluator for Evaluator { if b.squares[4] == Square::O { score -= 5; } - minimax::Evaluation::Score(score) + minimax::Evaluation::Score(if b.to_move == Square::X { score } else { -score }) } } @@ -241,14 +231,14 @@ fn main() { let mut b = Board::default(); let mut strategies = vec![ - (minimax::Player::Computer, Negamax::::new(Options { max_depth: 10 })), - (minimax::Player::Opponent, Negamax::::new(Options { max_depth: 10 })), + Negamax::::new(Options { max_depth: 10 }), + Negamax::::new(Options { max_depth: 10 }), ]; let mut s = 0; while self::Game::get_winner(&b).is_none() { println!("{}", b); - let (p, ref mut strategy) = strategies[s]; - match strategy.choose_move(&mut b, p) { + let ref mut strategy = strategies[s]; + match strategy.choose_move(&mut b) { Some(m) => m.apply(&mut b), None => break, } diff --git a/src/interface.rs b/src/interface.rs index 236bbbc..a4b76f8 100644 --- a/src/interface.rs +++ b/src/interface.rs @@ -2,35 +2,13 @@ use std::ops; -/// A competitor within a game. -/// -/// For simplicity, only two players are supported. Their values correspond to -/// the "color" parameter in Negamax. -#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] -#[repr(i8)] -pub enum Player { - Computer = 1, - Opponent = -1, -} - -/// Negating a player results in the opposite one. -impl ops::Neg for Player { - type Output = Player; - #[inline] - fn neg(self) -> Player { - match self { - Player::Computer => Player::Opponent, - Player::Opponent => Player::Computer, - } - } -} - -/// An assessment of a game state from a particular player's perspective. +/// An assessment of a game state from the perspective of the player about to move. #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] pub enum Evaluation { /// An absolutely disastrous outcome, e.g. a loss. Worst, /// An outcome with some score. Higher values mean a more favorable state. + /// A draw is defined as a score of zero. Score(i64), /// An absolutely wonderful outcome, e.g. a win. Best, @@ -50,36 +28,13 @@ impl ops::Neg for Evaluation { } } -/// Multiplying a player and an evaluation negates the latter iff the former -/// is `Opponent`. -impl ops::Mul for Player { - type Output = Evaluation; - #[inline] - fn mul(self, e: Evaluation) -> Evaluation { - match self { - Player::Computer => e, - Player::Opponent => -e, - } - } -} - /// Evaluates a game's positions. -/// -/// The methods are defined recursively, so that implementing one is sufficient. pub trait Evaluator { /// The type of game that can be evaluated. type G: Game; - /// Evaluate the state from the persective of `Player::Computer`. - #[inline] - fn evaluate(s: &::S, mw: Option) -> Evaluation { - Self::evaluate_for(s, mw, Player::Computer) - } - - /// Evaluate the state from the given player's persective. - #[inline] - fn evaluate_for(s: &::S, mw: Option, p: Player) -> Evaluation { - p * Self::evaluate(s, mw) - } + /// Evaluate the non-terminal state from the persective of the player to + /// move next. + fn evaluate(s: &::S) -> Evaluation; } /// Defines how a move affects the game state. @@ -98,10 +53,26 @@ pub trait Move { /// The result of playing a game until it finishes. #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub enum Winner { - /// A player won. - Competitor(Player), + /// The player who made the last move won. + PlayerJustMoved, /// Nobody won. Draw, + /// The player who made the last move lost. + /// + /// This is uncommon, and many games (chess, checkers, tic-tac-toe, etc) + /// do not have this possibility. + PlayerToMove, +} + +impl Winner { + /// Canonical evaluations for end states. + pub fn evaluate(&self) -> Evaluation { + match *self { + Winner::PlayerJustMoved => Evaluation::Worst, + Winner::PlayerToMove => Evaluation::Best, + Winner::Draw => Evaluation::Score(0), + } + } } /// Defines the rules for a two-player, perfect-knowledge game. @@ -114,24 +85,24 @@ pub trait Game : Sized { /// The type of game moves. type M: Move; - /// Generate moves for a player at the given state. After finishing, the - /// next entry in the slice should be set to `None` to indicate the end. - /// Returns the number of moves generated. + /// Generate moves at the given state. After finishing, the next entry in + /// the slice should be set to `None` to indicate the end. Returns the + /// number of moves generated. /// /// Currently, there's a deficiency that all strategies assume that at most /// 100 moves may be generated for any position, which allows the underlying - /// memory for the slice to be a stack-allocated array. One stable, this + /// memory for the slice to be a stack-allocated array. Once stable, this /// trait will be extended with an associated constant to specify the /// maximum number of moves. - fn generate_moves(&Self::S, Player, &mut [Option]) -> usize; + fn generate_moves(&Self::S, &mut [Option]) -> usize; - /// Returns `Some(Competitor(winning_player))` if there's a winner, + /// Returns `Some(PlayerJustMoved)` or `Some(PlayerToMove)` if there's a winner, /// `Some(Draw)` if the state is terminal without a winner, and `None` if /// the state is non-terminal. fn get_winner(&Self::S) -> Option; } -/// Defines a method of choosing a move for either player in a any game. +/// Defines a method of choosing a move for the current player. pub trait Strategy { - fn choose_move(&mut self, &G::S, Player) -> Option; + fn choose_move(&mut self, &G::S) -> Option; } diff --git a/src/lib.rs b/src/lib.rs index 70ec9b9..c384f15 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,5 +4,5 @@ pub mod interface; pub mod strategies; pub mod util; -pub use interface::{Evaluation, Evaluator, Game, Move, Player, Strategy, Winner}; +pub use interface::{Evaluation, Evaluator, Game, Move, Strategy, Winner}; pub use strategies::negamax::{Negamax, Options}; diff --git a/src/strategies/negamax.rs b/src/strategies/negamax.rs index a9c0778..fbe9f7e 100644 --- a/src/strategies/negamax.rs +++ b/src/strategies/negamax.rs @@ -13,21 +13,22 @@ use std::marker::PhantomData; fn negamax(s: &mut ::S, depth: usize, mut alpha: Evaluation, - beta: Evaluation, - p: Player) + beta: Evaluation) -> Evaluation where <::G as Game>::M: Copy { - let maybe_winner = E::G::get_winner(s); - if depth == 0 || maybe_winner.is_some() { - return p * E::evaluate(s, maybe_winner); + if let Some(winner) = E::G::get_winner(s) { + return winner.evaluate(); + } + if depth == 0 { + return E::evaluate(s); } let mut moves = [None; 100]; - E::G::generate_moves(s, p, &mut moves); + E::G::generate_moves(s, &mut moves); let mut best = Evaluation::Worst; for m in moves.iter().take_while(|om| om.is_some()).map(|om| om.unwrap()) { m.apply(s); - let value = -negamax::(s, depth - 1, -beta, -alpha, -p); + let value = -negamax::(s, depth - 1, -beta, -alpha); m.undo(s); best = max(best, value); alpha = max(alpha, value); @@ -63,10 +64,10 @@ impl Negamax { impl Strategy for Negamax where ::S: Clone, ::M: Copy { - fn choose_move(&mut self, s: &::S, p: Player) -> Option<::M> { + fn choose_move(&mut self, s: &::S) -> Option<::M> { let mut best = Evaluation::Worst; let mut moves = [None; 100]; - E::G::generate_moves(s, p, &mut moves); + E::G::generate_moves(s, &mut moves); let mut candidate_moves = Vec::new(); let mut s_clone = s.clone(); for m in moves.iter().take_while(|m| m.is_some()).map(|m| m.unwrap()) { @@ -75,8 +76,7 @@ impl Strategy for Negamax let value = -negamax::(&mut s_clone, self.opts.max_depth, Evaluation::Worst, - Evaluation::Best, - -p); + Evaluation::Best); m.undo(&mut s_clone); // this move is a candidate move if value == best { diff --git a/src/strategies/random.rs b/src/strategies/random.rs index 56b560c..e7c2ef2 100644 --- a/src/strategies/random.rs +++ b/src/strategies/random.rs @@ -16,9 +16,9 @@ impl Random { impl Strategy for Random where G::M: Copy { - fn choose_move(&mut self, s: &G::S, p: Player) -> Option { + fn choose_move(&mut self, s: &G::S) -> Option { let mut moves: [Option; 100] = [None; 100]; - match G::generate_moves(s, p, &mut moves) { + match G::generate_moves(s, &mut moves) { 0 => None, num_moves => Some(moves[self.rng.gen_range(0, num_moves)].unwrap()), } diff --git a/src/util.rs b/src/util.rs index 19c4d57..4977de1 100644 --- a/src/util.rs +++ b/src/util.rs @@ -6,27 +6,28 @@ use std::default::Default; /// Play a complete, new game with players using the two provided strategies. /// -/// The first strategy will be `Player::Computer`, the other `Player::Opponent`. -/// Returns result of the game. -pub fn battle_royale(s1: &mut S1, s2: &mut S2) -> interface::Winner +/// Returns `None` if the game ends in a draw, or `Some(0)`, `Some(1)` if the +/// first or second strategy won, respectively. +pub fn battle_royale(s1: &mut S1, s2: &mut S2) -> Option where G: interface::Game, G::S: Default, S1: interface::Strategy, S2: interface::Strategy { let mut state = G::S::default(); - let mut strategies: [(interface::Player, &mut dyn interface::Strategy); 2] = [ - (interface::Player::Computer, s1), - (interface::Player::Opponent, s2), - ]; + let mut strategies: [&mut dyn interface::Strategy; 2] = [s1, s2]; let mut s = 0; while G::get_winner(&state).is_none() { - let (p, ref mut strategy) = strategies[s]; - match strategy.choose_move(&mut state, p) { + let ref mut strategy = strategies[s]; + match strategy.choose_move(&mut state) { Some(m) => m.apply(&mut state), None => break, } s = 1 - s; } - G::get_winner(&state).unwrap() + match G::get_winner(&state).unwrap() { + interface::Winner::Draw => None, + interface::Winner::PlayerJustMoved => Some(1-s), + interface::Winner::PlayerToMove => Some(s), + } } diff --git a/tests/ttt.rs b/tests/ttt.rs index 796677e..62f7e95 100644 --- a/tests/ttt.rs +++ b/tests/ttt.rs @@ -12,7 +12,7 @@ fn test_ttt_negamax_always_draws() { let mut s1 = Negamax::::new(Options { max_depth: 10 }); let mut s2 = Negamax::::new(Options { max_depth: 10 }); for _ in 0..100 { - assert!(battle_royale(&mut s1, &mut s2) == minimax::Winner::Draw) + assert_eq!(battle_royale(&mut s1, &mut s2), None); } } @@ -25,7 +25,6 @@ fn test_ttt_negamax_vs_random_always_wins_or_draws() { let mut s1 = Negamax::::new(Options { max_depth: 10 }); let mut s2 = Random::new(); for _ in 0..100 { - assert!(battle_royale(&mut s1, &mut s2) != - minimax::Winner::Competitor(minimax::Player::Opponent)) + assert_ne!(battle_royale(&mut s1, &mut s2), Some(1)); } } From 5529dd5365f9dba6e89587ecddf3b521bf16e2bd Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Wed, 3 Feb 2021 19:53:29 -0800 Subject: [PATCH 004/162] Increase maximum moves to 200. Benchmark gets 3% slower. --- src/interface.rs | 2 +- src/strategies/negamax.rs | 4 ++-- src/strategies/random.rs | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/interface.rs b/src/interface.rs index a4b76f8..afb00d8 100644 --- a/src/interface.rs +++ b/src/interface.rs @@ -90,7 +90,7 @@ pub trait Game : Sized { /// number of moves generated. /// /// Currently, there's a deficiency that all strategies assume that at most - /// 100 moves may be generated for any position, which allows the underlying + /// 200 moves may be generated for any position, which allows the underlying /// memory for the slice to be a stack-allocated array. Once stable, this /// trait will be extended with an associated constant to specify the /// maximum number of moves. diff --git a/src/strategies/negamax.rs b/src/strategies/negamax.rs index fbe9f7e..489b3f9 100644 --- a/src/strategies/negamax.rs +++ b/src/strategies/negamax.rs @@ -23,7 +23,7 @@ fn negamax(s: &mut ::S, if depth == 0 { return E::evaluate(s); } - let mut moves = [None; 100]; + let mut moves = [None; 200]; E::G::generate_moves(s, &mut moves); let mut best = Evaluation::Worst; for m in moves.iter().take_while(|om| om.is_some()).map(|om| om.unwrap()) { @@ -66,7 +66,7 @@ impl Strategy for Negamax ::M: Copy { fn choose_move(&mut self, s: &::S) -> Option<::M> { let mut best = Evaluation::Worst; - let mut moves = [None; 100]; + let mut moves = [None; 200]; E::G::generate_moves(s, &mut moves); let mut candidate_moves = Vec::new(); let mut s_clone = s.clone(); diff --git a/src/strategies/random.rs b/src/strategies/random.rs index e7c2ef2..b60d52c 100644 --- a/src/strategies/random.rs +++ b/src/strategies/random.rs @@ -17,7 +17,7 @@ impl Random { impl Strategy for Random where G::M: Copy { fn choose_move(&mut self, s: &G::S) -> Option { - let mut moves: [Option; 100] = [None; 100]; + let mut moves: [Option; 200] = [None; 200]; match G::generate_moves(s, &mut moves) { 0 => None, num_moves => Some(moves[self.rng.gen_range(0, num_moves)].unwrap()), From fd8e4d9545efecb9fca8f3018364acfd5c955b1a Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Thu, 4 Feb 2021 21:37:11 -0800 Subject: [PATCH 005/162] Use alpha-beta pruning at the top level of negamax. Since nodes that get cut off can return the best score so far (which they know they can't beat), we permute the initial order instead of choosing randomly from the set of equally-performing nodes. The benchmark gets about 2x faster, but individual runs are more variable, as exploring best to worst will cut off a lot more than exploring worst to best. It will always be strictly faster than the previous version. --- src/strategies/negamax.rs | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/src/strategies/negamax.rs b/src/strategies/negamax.rs index 489b3f9..7594f58 100644 --- a/src/strategies/negamax.rs +++ b/src/strategies/negamax.rs @@ -67,8 +67,12 @@ impl Strategy for Negamax fn choose_move(&mut self, s: &::S) -> Option<::M> { let mut best = Evaluation::Worst; let mut moves = [None; 200]; - E::G::generate_moves(s, &mut moves); - let mut candidate_moves = Vec::new(); + let n = E::G::generate_moves(s, &mut moves); + // Randomly permute order that we look at the moves. + // We'll pick the first best score from this list. + self.rng.shuffle(&mut moves[..n]); + + let mut best_move = moves.iter().next()?.unwrap(); let mut s_clone = s.clone(); for m in moves.iter().take_while(|m| m.is_some()).map(|m| m.unwrap()) { // determine value for this move @@ -76,22 +80,14 @@ impl Strategy for Negamax let value = -negamax::(&mut s_clone, self.opts.max_depth, Evaluation::Worst, - Evaluation::Best); + -best); m.undo(&mut s_clone); - // this move is a candidate move - if value == best { - candidate_moves.push(m); - // this move is better than any previous, so it's the sole candidate - } else if value > best { - candidate_moves.clear(); - candidate_moves.push(m); + // Strictly better than any move found so far. + if value > best { best = value; + best_move = m; } } - if candidate_moves.is_empty() { - None - } else { - Some(candidate_moves[self.rng.gen_range(0, candidate_moves.len())]) - } + Some(best_move) } } From feb37b2e05c5e020114516cb10e51ff1b91fd6b4 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Sat, 13 Feb 2021 14:02:58 -0800 Subject: [PATCH 006/162] Convert Evaluation from an enum to an alias for i32. This serves two purposes: 1) 64 bits seems unnecessary, and 32 bits without an enum tag allows a transposition table to fit more values. 2) A raw i32 allows parallel Strategies to compare and update values with simple atomic operations. --- examples/ttt.rs | 2 +- src/interface.rs | 42 +++++++++++++-------------------------- src/strategies/negamax.rs | 6 +++--- 3 files changed, 18 insertions(+), 32 deletions(-) diff --git a/examples/ttt.rs b/examples/ttt.rs index 8598d3e..d19a3d4 100644 --- a/examples/ttt.rs +++ b/examples/ttt.rs @@ -221,7 +221,7 @@ impl minimax::Evaluator for Evaluator { if b.squares[4] == Square::O { score -= 5; } - minimax::Evaluation::Score(if b.to_move == Square::X { score } else { -score }) + if b.to_move == Square::X { score } else { -score } } } diff --git a/src/interface.rs b/src/interface.rs index afb00d8..29258cb 100644 --- a/src/interface.rs +++ b/src/interface.rs @@ -1,32 +1,18 @@ //! The common structures and traits. -use std::ops; - /// An assessment of a game state from the perspective of the player about to move. -#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] -pub enum Evaluation { - /// An absolutely disastrous outcome, e.g. a loss. - Worst, - /// An outcome with some score. Higher values mean a more favorable state. - /// A draw is defined as a score of zero. - Score(i64), - /// An absolutely wonderful outcome, e.g. a win. - Best, -} +/// Higher values mean a more favorable state. +/// A draw is defined as a score of zero. +pub type Evaluation = i32; -/// Negating an evaluation results in the corresponding one from the other -/// player's persective. -impl ops::Neg for Evaluation { - type Output = Evaluation; - #[inline] - fn neg(self) -> Evaluation { - match self { - Evaluation::Worst => Evaluation::Best, - Evaluation::Score(s) => Evaluation::Score(-s), - Evaluation::Best => Evaluation::Worst, - } - } -} +// These definitions ensure that they negate to each other, but it leaves +// i32::MIN as a valid value less than WORST_EVAL. Don't use this value, and +// any Strategy will panic when it tries to negate it. + +/// An absolutely wonderful outcome, e.g. a win. +pub const BEST_EVAL: Evaluation = i32::MAX; +/// An absolutely disastrous outcome, e.g. a loss. +pub const WORST_EVAL: Evaluation = -BEST_EVAL; /// Evaluates a game's positions. pub trait Evaluator { @@ -68,9 +54,9 @@ impl Winner { /// Canonical evaluations for end states. pub fn evaluate(&self) -> Evaluation { match *self { - Winner::PlayerJustMoved => Evaluation::Worst, - Winner::PlayerToMove => Evaluation::Best, - Winner::Draw => Evaluation::Score(0), + Winner::PlayerJustMoved => WORST_EVAL, + Winner::PlayerToMove => BEST_EVAL, + Winner::Draw => 0, } } } diff --git a/src/strategies/negamax.rs b/src/strategies/negamax.rs index 7594f58..47a7587 100644 --- a/src/strategies/negamax.rs +++ b/src/strategies/negamax.rs @@ -25,7 +25,7 @@ fn negamax(s: &mut ::S, } let mut moves = [None; 200]; E::G::generate_moves(s, &mut moves); - let mut best = Evaluation::Worst; + let mut best = WORST_EVAL; for m in moves.iter().take_while(|om| om.is_some()).map(|om| om.unwrap()) { m.apply(s); let value = -negamax::(s, depth - 1, -beta, -alpha); @@ -65,7 +65,7 @@ impl Strategy for Negamax where ::S: Clone, ::M: Copy { fn choose_move(&mut self, s: &::S) -> Option<::M> { - let mut best = Evaluation::Worst; + let mut best = WORST_EVAL; let mut moves = [None; 200]; let n = E::G::generate_moves(s, &mut moves); // Randomly permute order that we look at the moves. @@ -79,7 +79,7 @@ impl Strategy for Negamax m.apply(&mut s_clone); let value = -negamax::(&mut s_clone, self.opts.max_depth, - Evaluation::Worst, + WORST_EVAL, -best); m.undo(&mut s_clone); // Strictly better than any move found so far. From a0557e93c8e963bf9a9011ddc91ce89dbdf93579 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Sat, 13 Feb 2021 17:24:55 -0800 Subject: [PATCH 007/162] Add a rustfmt config and apply it. --- .rustfmt.toml | 3 ++ benches/negamax.rs | 8 ++- examples/ttt.rs | 104 +++++++++++++++++++++----------------- src/interface.rs | 14 ++--- src/strategies/negamax.rs | 34 +++++-------- src/strategies/random.rs | 4 +- src/util.rs | 15 +++--- tests/ttt.rs | 2 +- 8 files changed, 96 insertions(+), 88 deletions(-) create mode 100644 .rustfmt.toml diff --git a/.rustfmt.toml b/.rustfmt.toml new file mode 100644 index 0000000..a835e4e --- /dev/null +++ b/.rustfmt.toml @@ -0,0 +1,3 @@ +edition = "2018" +fn_args_layout = "Compressed" +use_small_heuristics = "Max" diff --git a/benches/negamax.rs b/benches/negamax.rs index 8403fa8..0fdd418 100644 --- a/benches/negamax.rs +++ b/benches/negamax.rs @@ -1,8 +1,8 @@ #![feature(test)] extern crate minimax; extern crate test; -use test::Bencher; use minimax::*; +use test::Bencher; #[derive(Clone)] pub struct Board; @@ -16,10 +16,8 @@ pub struct Noop; impl Move for Place { type G = Noop; - fn apply(&self, _: &mut Board) { - } - fn undo(&self, _: &mut Board) { - } + fn apply(&self, _: &mut Board) {} + fn undo(&self, _: &mut Board) {} } impl Game for Noop { diff --git a/examples/ttt.rs b/examples/ttt.rs index d19a3d4..feea5c2 100644 --- a/examples/ttt.rs +++ b/examples/ttt.rs @@ -20,11 +20,11 @@ pub enum Square { impl Square { fn invert(&self) -> Self { - match *self { - Square::Empty => Square::Empty, - Square::X => Square::O, - Square::O => Square::X, - } + match *self { + Square::Empty => Square::Empty, + Square::X => Square::O, + Square::O => Square::X, + } } } @@ -36,13 +36,15 @@ impl Default for Square { impl Display for Square { fn fmt(&self, f: &mut Formatter) -> Result { - write!(f, - "{}", - match *self { - Square::Empty => ' ', - Square::X => 'X', - Square::O => 'O', - }) + write!( + f, + "{}", + match *self { + Square::Empty => ' ', + Square::X => 'X', + Square::O => 'O', + } + ) } } @@ -54,7 +56,7 @@ pub struct Board { impl Board { fn just_moved(&self) -> Square { - self.to_move.invert() + self.to_move.invert() } } @@ -66,21 +68,9 @@ impl Default for Board { impl Display for Board { fn fmt(&self, f: &mut Formatter) -> Result { - writeln!(f, - "{} | {} | {}", - self.squares[0], - self.squares[1], - self.squares[2])?; - writeln!(f, - "{} | {} | {}", - self.squares[3], - self.squares[4], - self.squares[5])?; - writeln!(f, - "{} | {} | {}", - self.squares[6], - self.squares[7], - self.squares[8])?; + writeln!(f, "{} | {} | {}", self.squares[0], self.squares[1], self.squares[2])?; + writeln!(f, "{} | {} | {}", self.squares[3], self.squares[4], self.squares[5])?; + writeln!(f, "{} | {} | {}", self.squares[6], self.squares[7], self.squares[8])?; Ok(()) } } @@ -104,41 +94,57 @@ impl minimax::Game for Game { } fn get_winner(b: &Board) -> Option { - // A player can only cause themselves to win on their turn, so only check for that. + // A player can only cause themselves to win on their turn, so only check for that. // horizontal wins - if b.squares[0] == b.just_moved() && b.squares[0] == b.squares[1] && - b.squares[1] == b.squares[2] { + if b.squares[0] == b.just_moved() + && b.squares[0] == b.squares[1] + && b.squares[1] == b.squares[2] + { return Some(minimax::Winner::PlayerJustMoved); } - if b.squares[3] == b.just_moved() && b.squares[3] == b.squares[4] && - b.squares[4] == b.squares[5] { + if b.squares[3] == b.just_moved() + && b.squares[3] == b.squares[4] + && b.squares[4] == b.squares[5] + { return Some(minimax::Winner::PlayerJustMoved); } - if b.squares[6] == b.just_moved() && b.squares[6] == b.squares[7] && - b.squares[7] == b.squares[8] { + if b.squares[6] == b.just_moved() + && b.squares[6] == b.squares[7] + && b.squares[7] == b.squares[8] + { return Some(minimax::Winner::PlayerJustMoved); } // vertical wins - if b.squares[0] == b.just_moved() && b.squares[0] == b.squares[3] && - b.squares[3] == b.squares[6] { + if b.squares[0] == b.just_moved() + && b.squares[0] == b.squares[3] + && b.squares[3] == b.squares[6] + { return Some(minimax::Winner::PlayerJustMoved); } - if b.squares[1] == b.just_moved() && b.squares[1] == b.squares[4] && - b.squares[4] == b.squares[7] { + if b.squares[1] == b.just_moved() + && b.squares[1] == b.squares[4] + && b.squares[4] == b.squares[7] + { return Some(minimax::Winner::PlayerJustMoved); } - if b.squares[2] == b.just_moved() && b.squares[2] == b.squares[5] && - b.squares[5] == b.squares[8] { + if b.squares[2] == b.just_moved() + && b.squares[2] == b.squares[5] + && b.squares[5] == b.squares[8] + { return Some(minimax::Winner::PlayerJustMoved); } // diagonal wins - if b.squares[0] == b.just_moved() && b.squares[0] == b.squares[4] && - b.squares[4] == b.squares[8] { + if b.squares[0] == b.just_moved() + && b.squares[0] == b.squares[4] + && b.squares[4] == b.squares[8] + { return Some(minimax::Winner::PlayerJustMoved); } - if b.squares[2] == b.just_moved() && b.squares[2] == b.squares[4] && - b.squares[4] == b.squares[6] { + if b.squares[2] == b.just_moved() + && b.squares[2] == b.squares[4] + && b.squares[4] == b.squares[6] + { return Some(minimax::Winner::PlayerJustMoved); } // draws @@ -221,13 +227,17 @@ impl minimax::Evaluator for Evaluator { if b.squares[4] == Square::O { score -= 5; } - if b.to_move == Square::X { score } else { -score } + if b.to_move == Square::X { + score + } else { + -score + } } } fn main() { - use minimax::{Game, Move, Strategy}; use minimax::strategies::negamax::{Negamax, Options}; + use minimax::{Game, Move, Strategy}; let mut b = Board::default(); let mut strategies = vec![ diff --git a/src/interface.rs b/src/interface.rs index 29258cb..a729072 100644 --- a/src/interface.rs +++ b/src/interface.rs @@ -53,11 +53,11 @@ pub enum Winner { impl Winner { /// Canonical evaluations for end states. pub fn evaluate(&self) -> Evaluation { - match *self { - Winner::PlayerJustMoved => WORST_EVAL, - Winner::PlayerToMove => BEST_EVAL, - Winner::Draw => 0, - } + match *self { + Winner::PlayerJustMoved => WORST_EVAL, + Winner::PlayerToMove => BEST_EVAL, + Winner::Draw => 0, + } } } @@ -65,11 +65,11 @@ impl Winner { /// /// A game ties together types for the state and moves, generates the possible /// moves from a particular state, and determines whether a state is terminal. -pub trait Game : Sized { +pub trait Game: Sized { /// The type of the game state. type S; /// The type of game moves. - type M: Move; + type M: Move; /// Generate moves at the given state. After finishing, the next entry in /// the slice should be set to `None` to indicate the end. Returns the diff --git a/src/strategies/negamax.rs b/src/strategies/negamax.rs index 47a7587..df88fc3 100644 --- a/src/strategies/negamax.rs +++ b/src/strategies/negamax.rs @@ -10,15 +10,14 @@ use rand::Rng; use std::cmp::max; use std::marker::PhantomData; -fn negamax(s: &mut ::S, - depth: usize, - mut alpha: Evaluation, - beta: Evaluation) - -> Evaluation - where <::G as Game>::M: Copy +fn negamax( + s: &mut ::S, depth: usize, mut alpha: Evaluation, beta: Evaluation, +) -> Evaluation +where + <::G as Game>::M: Copy, { if let Some(winner) = E::G::get_winner(s) { - return winner.evaluate(); + return winner.evaluate(); } if depth == 0 { return E::evaluate(s); @@ -33,7 +32,7 @@ fn negamax(s: &mut ::S, best = max(best, value); alpha = max(alpha, value); if alpha >= beta { - break + break; } } best @@ -53,17 +52,15 @@ pub struct Negamax { impl Negamax { pub fn new(opts: Options) -> Negamax { - Negamax { - opts: opts, - rng: rand::thread_rng(), - _eval: PhantomData, - } + Negamax { opts: opts, rng: rand::thread_rng(), _eval: PhantomData } } } impl Strategy for Negamax - where ::S: Clone, - ::M: Copy { +where + ::S: Clone, + ::M: Copy, +{ fn choose_move(&mut self, s: &::S) -> Option<::M> { let mut best = WORST_EVAL; let mut moves = [None; 200]; @@ -72,15 +69,12 @@ impl Strategy for Negamax // We'll pick the first best score from this list. self.rng.shuffle(&mut moves[..n]); - let mut best_move = moves.iter().next()?.unwrap(); + let mut best_move = (*moves.iter().next()?)?; let mut s_clone = s.clone(); for m in moves.iter().take_while(|m| m.is_some()).map(|m| m.unwrap()) { // determine value for this move m.apply(&mut s_clone); - let value = -negamax::(&mut s_clone, - self.opts.max_depth, - WORST_EVAL, - -best); + let value = -negamax::(&mut s_clone, self.opts.max_depth, WORST_EVAL, -best); m.undo(&mut s_clone); // Strictly better than any move found so far. if value > best { diff --git a/src/strategies/random.rs b/src/strategies/random.rs index b60d52c..7e34b7c 100644 --- a/src/strategies/random.rs +++ b/src/strategies/random.rs @@ -15,7 +15,9 @@ impl Random { } impl Strategy for Random - where G::M: Copy { +where + G::M: Copy, +{ fn choose_move(&mut self, s: &G::S) -> Option { let mut moves: [Option; 200] = [None; 200]; match G::generate_moves(s, &mut moves) { diff --git a/src/util.rs b/src/util.rs index 4977de1..ed8288d 100644 --- a/src/util.rs +++ b/src/util.rs @@ -9,10 +9,11 @@ use std::default::Default; /// Returns `None` if the game ends in a draw, or `Some(0)`, `Some(1)` if the /// first or second strategy won, respectively. pub fn battle_royale(s1: &mut S1, s2: &mut S2) -> Option - where G: interface::Game, - G::S: Default, - S1: interface::Strategy, - S2: interface::Strategy +where + G: interface::Game, + G::S: Default, + S1: interface::Strategy, + S2: interface::Strategy, { let mut state = G::S::default(); let mut strategies: [&mut dyn interface::Strategy; 2] = [s1, s2]; @@ -26,8 +27,8 @@ pub fn battle_royale(s1: &mut S1, s2: &mut S2) -> Option s = 1 - s; } match G::get_winner(&state).unwrap() { - interface::Winner::Draw => None, - interface::Winner::PlayerJustMoved => Some(1-s), - interface::Winner::PlayerToMove => Some(s), + interface::Winner::Draw => None, + interface::Winner::PlayerJustMoved => Some(1 - s), + interface::Winner::PlayerToMove => Some(s), } } diff --git a/tests/ttt.rs b/tests/ttt.rs index 62f7e95..60953f4 100644 --- a/tests/ttt.rs +++ b/tests/ttt.rs @@ -1,6 +1,6 @@ extern crate minimax; -#[path="../examples/ttt.rs"] +#[path = "../examples/ttt.rs"] mod ttt; use minimax::util::battle_royale; From ef8060fa5e14fb1f8ce9019bc160c85758b5faad Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Sat, 13 Feb 2021 17:39:01 -0800 Subject: [PATCH 008/162] Migrate benchmark to bencher crate to use stable rust. --- Cargo.toml | 7 +++++++ benches/negamax.rs | 12 +++++++----- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index fc0eceb..e22a435 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,5 +12,12 @@ license = "MIT" [dependencies] rand = "0.4.*" +[dev-dependencies] +bencher = "0.1.5" + +[[bench]] +name = "negamax" +harness = false + [profile.test] opt-level = 3 diff --git a/benches/negamax.rs b/benches/negamax.rs index 0fdd418..00b1ca8 100644 --- a/benches/negamax.rs +++ b/benches/negamax.rs @@ -1,8 +1,8 @@ -#![feature(test)] +#[macro_use] +extern crate bencher; extern crate minimax; -extern crate test; +use bencher::Bencher; use minimax::*; -use test::Bencher; #[derive(Clone)] pub struct Board; @@ -42,13 +42,15 @@ impl Evaluator for Eval { type G = Noop; fn evaluate(_: &Board) -> Evaluation { - Evaluation::Score(0) + 0 } } -#[bench] fn bench_negamax(b: &mut Bencher) { let board = Board; let mut s = Negamax::::new(Options { max_depth: 10 }); b.iter(|| s.choose_move(&board)); } + +benchmark_group!(benches, bench_negamax); +benchmark_main!(benches); From cb179573d94bc2d9666623c1bb57afb5a9eb9a7b Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Sat, 13 Feb 2021 22:22:07 -0800 Subject: [PATCH 009/162] Add connect four example game. --- examples/connect4.rs | 196 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 196 insertions(+) create mode 100644 examples/connect4.rs diff --git a/examples/connect4.rs b/examples/connect4.rs new file mode 100644 index 0000000..f16a7bd --- /dev/null +++ b/examples/connect4.rs @@ -0,0 +1,196 @@ +//! A definition of the game Connect Four using the library, for use in tests and benchmarks. + +extern crate minimax; + +use std::default::Default; +use std::fmt::{Display, Formatter, Result}; + +#[derive(Clone)] +pub struct Board { + // Some bitboard ideas from github.com/PascalPons/connect4 + /* bit order example: + * Leaves a blank row on top. + * 5 12 19 26 33 40 47 + * 4 11 18 25 32 39 46 + * 3 10 17 24 31 38 45 + * 2 9 16 23 30 37 44 + * 1 8 15 22 29 36 43 + * 0 7 14 21 28 35 42 + */ + red_pieces: u64, + yellow_pieces: u64, + reds_move: bool, + num_moves: u8, +} + +const NUM_COLS: u32 = 7; +const NUM_ROWS: u32 = 6; +const HEIGHT: u32 = NUM_ROWS + 1; +const COL_MASK: u64 = (1 << NUM_ROWS) - 1; + +impl Board { + fn all_pieces(&self) -> u64 { + self.red_pieces | self.yellow_pieces + } +} + +impl Default for Board { + fn default() -> Board { + Board { red_pieces: 0, yellow_pieces: 0, reds_move: true, num_moves: 0 } + } +} + +impl Display for Board { + fn fmt(&self, f: &mut Formatter) -> Result { + for row in (0..6).rev() { + for col in 0..7 { + write!( + f, + "{}", + if self.red_pieces >> (row + col * HEIGHT) & 1 != 0 { + '\u{1F534}' + } else if self.yellow_pieces >> (row + col * HEIGHT) & 1 != 0 { + '\u{1F7E1}' + } else { + '\u{25ef}' + } + )?; + } + writeln!(f, "")?; + } + Ok(()) + } +} + +#[derive(Copy, Clone)] +pub struct Place { + col: u8, +} + +impl Place { + fn col_shift(&self) -> u32 { + self.col as u32 * HEIGHT + } +} + +impl minimax::Move for Place { + type G = Game; + fn apply(&self, b: &mut Board) { + let col = (b.all_pieces() >> self.col_shift()) & COL_MASK; + let new_piece = (col + 1) << self.col_shift(); + if b.reds_move { + b.red_pieces |= new_piece; + } else { + b.yellow_pieces |= new_piece; + } + b.reds_move = !b.reds_move; + b.num_moves += 1; + } + + fn undo(&self, b: &mut Board) { + let col = (b.all_pieces() >> self.col_shift()) & COL_MASK; + let prev_piece = (col ^ (col >> 1)) << self.col_shift(); + b.reds_move = !b.reds_move; + if b.reds_move { + b.red_pieces &= !prev_piece; + } else { + b.yellow_pieces &= !prev_piece; + } + b.num_moves -= 1; + } +} + +pub struct Game; + +impl minimax::Game for Game { + type S = Board; + type M = Place; + + fn generate_moves(b: &Board, moves: &mut [Option]) -> usize { + let mut n = 0; + let mut cols = b.all_pieces(); + for i in 0..NUM_COLS { + if cols & COL_MASK < COL_MASK { + moves[n] = Some(Place { col: i as u8 }); + n += 1; + } + cols >>= HEIGHT; + } + moves[n] = None; + n + } + + fn get_winner(b: &Board) -> Option { + // Position of pieces for the player that just moved. + let pieces = if b.reds_move { b.yellow_pieces } else { b.red_pieces }; + + // Detect pairs of two pieces in a row, then pairs of two pairs in a + // row. + + // Horizontal + let pairs = pieces & (pieces >> HEIGHT); + if pairs & (pairs >> (2 * HEIGHT)) != 0 { + return Some(minimax::Winner::PlayerJustMoved); + } + + // Vertical + let pairs = pieces & (pieces >> 1); + if pairs & (pairs >> 2) != 0 { + return Some(minimax::Winner::PlayerJustMoved); + } + + // Diagonal + let pairs = pieces & (pieces >> (HEIGHT - 1)); + if pairs & (pairs >> (2 * (HEIGHT - 1))) != 0 { + return Some(minimax::Winner::PlayerJustMoved); + } + + // Other diagonal + let pairs = pieces & (pieces >> (HEIGHT + 1)); + if pairs & (pairs >> (2 * (HEIGHT + 1))) != 0 { + return Some(minimax::Winner::PlayerJustMoved); + } + + // Full board with no winner. + if b.num_moves as u32 == NUM_ROWS * NUM_COLS { + Some(minimax::Winner::Draw) + } else { + None + } + } +} + +pub struct DumbEvaluator; + +impl minimax::Evaluator for DumbEvaluator { + type G = Game; + fn evaluate(_: &Board) -> minimax::Evaluation { + 0 + } +} + +fn main() { + use minimax::strategies::negamax::{Negamax, Options}; + use minimax::{Game, Move, Strategy}; + + let mut b = Board::default(); + let mut strategies = vec![ + Negamax::::new(Options { max_depth: 8 }), + Negamax::::new(Options { max_depth: 8 }), + ]; + let mut s = 0; + while self::Game::get_winner(&b).is_none() { + println!("{}", b); + let ref mut strategy = strategies[s]; + match strategy.choose_move(&mut b) { + Some(m) => { + let color = if b.reds_move { "Red" } else { "Yellow" }; + println!("{} piece in column {}", color, m.col + 1); + m.apply(&mut b) + } + None => break, + } + s = 1 - s; + } + println!("{}", b); +} From 51bd581a680ab513c92c0d2a01052503790187f7 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Sat, 13 Feb 2021 22:22:42 -0800 Subject: [PATCH 010/162] Tweak winning evaluations to prolong defeat and accelerate victory. --- src/strategies/negamax.rs | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/strategies/negamax.rs b/src/strategies/negamax.rs index df88fc3..9455057 100644 --- a/src/strategies/negamax.rs +++ b/src/strategies/negamax.rs @@ -10,6 +10,19 @@ use rand::Rng; use std::cmp::max; use std::marker::PhantomData; +// For values near winning and losing values, push them slightly closer to zero. +// A win in 3 moves (BEST-3) will be chosen over a win in 5 moves (BEST-5). +// A loss in 5 moves (WORST+5) will be chosen over a loss in 3 moves (WORST+3). +fn degrade_wins(value: Evaluation) -> Evaluation { + if value > BEST_EVAL - 100 { + value - 1 + } else if value < WORST_EVAL + 100 { + value + 1 + } else { + value + } +} + fn negamax( s: &mut ::S, depth: usize, mut alpha: Evaluation, beta: Evaluation, ) -> Evaluation @@ -35,7 +48,7 @@ where break; } } - best + degrade_wins(best) } /// Options to use for the `Negamax` engine. @@ -69,7 +82,7 @@ where // We'll pick the first best score from this list. self.rng.shuffle(&mut moves[..n]); - let mut best_move = (*moves.iter().next()?)?; + let mut best_move = moves[0]?; let mut s_clone = s.clone(); for m in moves.iter().take_while(|m| m.is_some()).map(|m| m.unwrap()) { // determine value for this move From 42769e16389af31b7e8740210ca58ef28f2c0dca Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Sun, 14 Feb 2021 08:36:38 -0800 Subject: [PATCH 011/162] Add actual evaluator to connect four. --- examples/connect4.rs | 110 +++++++++++++++++++++++++++++++++---------- 1 file changed, 85 insertions(+), 25 deletions(-) diff --git a/examples/connect4.rs b/examples/connect4.rs index f16a7bd..649a7eb 100644 --- a/examples/connect4.rs +++ b/examples/connect4.rs @@ -7,7 +7,7 @@ use std::fmt::{Display, Formatter, Result}; #[derive(Clone)] pub struct Board { - // Some bitboard ideas from github.com/PascalPons/connect4 + // Some bitboard ideas from http://blog.gamesolver.org/solving-connect-four/06-bitboard/ /* bit order example: * Leaves a blank row on top. * 5 12 19 26 33 40 47 @@ -126,28 +126,12 @@ impl minimax::Game for Game { // Detect pairs of two pieces in a row, then pairs of two pairs in a // row. + let matches = |shift| -> bool { + let pairs = pieces & (pieces >> shift); + pairs & (pairs >> 2 * shift) != 0 + }; - // Horizontal - let pairs = pieces & (pieces >> HEIGHT); - if pairs & (pairs >> (2 * HEIGHT)) != 0 { - return Some(minimax::Winner::PlayerJustMoved); - } - - // Vertical - let pairs = pieces & (pieces >> 1); - if pairs & (pairs >> 2) != 0 { - return Some(minimax::Winner::PlayerJustMoved); - } - - // Diagonal - let pairs = pieces & (pieces >> (HEIGHT - 1)); - if pairs & (pairs >> (2 * (HEIGHT - 1))) != 0 { - return Some(minimax::Winner::PlayerJustMoved); - } - - // Other diagonal - let pairs = pieces & (pieces >> (HEIGHT + 1)); - if pairs & (pairs >> (2 * (HEIGHT + 1))) != 0 { + if matches(1) || matches(HEIGHT) || matches(HEIGHT + 1) || matches(HEIGHT - 1) { return Some(minimax::Winner::PlayerJustMoved); } @@ -169,14 +153,90 @@ impl minimax::Evaluator for DumbEvaluator { } } +impl Board { + // Return bitmap of all open locations that would complete a four in a row for the given player. + fn find_fourth_moves(&self, pieces: u64) -> u64 { + let mut all = self.all_pieces(); + // Mark the fake row on top as full to prevent wrapping around. + let mut top_row = COL_MASK + 1; + for _ in 0..NUM_COLS { + all |= top_row; + top_row <<= HEIGHT; + } + + let matches = |shift| -> u64 { + let pairs = pieces & (pieces >> shift); // Pairs of this color. + let singles = (pieces >> shift) & !all | (pieces << shift) & !all; // One of this color and one empty. + (pairs >> shift * 2) & singles | (pairs << shift * 2) & singles + }; + + // Vertical + matches(1) | + // Horizontal + matches(HEIGHT) | + // Diagonal + matches(HEIGHT+1) | + // Other diagonal + matches(HEIGHT-1) + } +} + +pub struct BasicEvaluator; + +impl minimax::Evaluator for BasicEvaluator { + type G = Game; + fn evaluate(b: &Board) -> minimax::Evaluation { + let player_pieces = if b.reds_move { b.red_pieces } else { b.yellow_pieces }; + let opponent_pieces = if b.reds_move { b.yellow_pieces } else { b.red_pieces }; + let mut player_wins = b.find_fourth_moves(player_pieces); + let mut opponent_wins = b.find_fourth_moves(opponent_pieces); + + let mut score = 0; + // Bonus points for moves in the middle columns. + for col in 2..5 { + score += ((player_pieces >> (HEIGHT * col)) & COL_MASK).count_ones() as i32; + score -= ((opponent_pieces >> (HEIGHT * col)) & COL_MASK).count_ones() as i32; + } + + // Count columns that cause immediate win. + // Count columns that then allow immediate win. + let mut all = b.all_pieces(); + for _ in 0..NUM_COLS { + let next_move = (all & COL_MASK) + 1; + if next_move > COL_MASK { + continue; + } + if next_move & player_wins != 0 { + score += 10; + } + if next_move & opponent_wins != 0 { + score -= 10; + } + let afterwards_move = next_move << 1; + if afterwards_move & player_wins != 0 { + score += 5; + } + if afterwards_move & opponent_wins != 0 { + score -= 5; + } + + all >>= HEIGHT; + player_wins >>= HEIGHT; + opponent_wins >>= HEIGHT; + } + + score + } +} + fn main() { use minimax::strategies::negamax::{Negamax, Options}; use minimax::{Game, Move, Strategy}; let mut b = Board::default(); - let mut strategies = vec![ - Negamax::::new(Options { max_depth: 8 }), - Negamax::::new(Options { max_depth: 8 }), + let mut strategies: [&mut dyn Strategy; 2] = [ + &mut Negamax::::new(Options { max_depth: 8 }), + &mut Negamax::::new(Options { max_depth: 8 }), ]; let mut s = 0; while self::Game::get_winner(&b).is_none() { From 2a46eedb2d9aa364a0737e1eec7612aafe3142ba Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Sun, 14 Feb 2021 21:14:22 -0800 Subject: [PATCH 012/162] Add testing harness for strategies. Ensure they evaluate the same result as an optimization-free negamax. --- examples/connect4.rs | 11 +++- src/strategies/negamax.rs | 16 ++++- tests/strategies.rs | 126 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 151 insertions(+), 2 deletions(-) create mode 100644 tests/strategies.rs diff --git a/examples/connect4.rs b/examples/connect4.rs index 649a7eb..a2b76d9 100644 --- a/examples/connect4.rs +++ b/examples/connect4.rs @@ -29,9 +29,17 @@ const HEIGHT: u32 = NUM_ROWS + 1; const COL_MASK: u64 = (1 << NUM_ROWS) - 1; impl Board { - fn all_pieces(&self) -> u64 { + pub fn all_pieces(&self) -> u64 { self.red_pieces | self.yellow_pieces } + + pub fn pieces_to_move(&self) -> u64 { + if self.reds_move { + self.red_pieces + } else { + self.yellow_pieces + } + } } impl Default for Board { @@ -229,6 +237,7 @@ impl minimax::Evaluator for BasicEvaluator { } } +#[cfg(not(test))] fn main() { use minimax::strategies::negamax::{Negamax, Options}; use minimax::{Game, Move, Strategy}; diff --git a/src/strategies/negamax.rs b/src/strategies/negamax.rs index 9455057..5ec807a 100644 --- a/src/strategies/negamax.rs +++ b/src/strategies/negamax.rs @@ -60,12 +60,25 @@ pub struct Options { pub struct Negamax { opts: Options, rng: rand::ThreadRng, + prev_value: Evaluation, _eval: PhantomData, } impl Negamax { pub fn new(opts: Options) -> Negamax { - Negamax { opts: opts, rng: rand::thread_rng(), _eval: PhantomData } + Negamax { opts: opts, rng: rand::thread_rng(), prev_value: 0, _eval: PhantomData } + } + + // Return the value computed for the root node for the last computation. + pub fn root_value(&self) -> Evaluation { + // Undo any value clamping. + if self.prev_value > BEST_EVAL - 100 { + BEST_EVAL + } else if self.prev_value < WORST_EVAL + 100 { + WORST_EVAL + } else { + self.prev_value + } } } @@ -95,6 +108,7 @@ where best_move = m; } } + self.prev_value = best; Some(best_move) } } diff --git a/tests/strategies.rs b/tests/strategies.rs new file mode 100644 index 0000000..89d75fe --- /dev/null +++ b/tests/strategies.rs @@ -0,0 +1,126 @@ +// For a given Evaluator and depth, all Strategies should produce the same +// value for the root. They use different techniques and pruning heuristics +// for speed, but it's all fundamentally the minimax algorithm. This file +// creates fake evaluation trees of connect four, and ensures that all +// Strategies (including a plain negamax without alpha-beta) get the same answer. + +extern crate minimax; +extern crate rand; +#[path = "../examples/connect4.rs"] +mod connect4; + +use minimax::interface::*; +use rand::Rng; +use std::cmp::max; +use std::marker::PhantomData; + +fn negamax(s: &mut ::S, depth: usize) -> Evaluation +where + <::G as Game>::M: Copy, +{ + if let Some(winner) = E::G::get_winner(s) { + return winner.evaluate(); + } + if depth == 0 { + return E::evaluate(s); + } + let mut moves = [None; 200]; + let n = E::G::generate_moves(s, &mut moves); + let mut best = WORST_EVAL; + for m in moves[..n].iter().map(|m| m.unwrap()) { + m.apply(s); + let value = -negamax::(s, depth - 1); + m.undo(s); + best = max(best, value); + } + best +} + +pub struct PlainNegamax { + depth: usize, + root_value: Evaluation, + _eval: PhantomData, +} + +impl PlainNegamax { + pub fn new(depth: usize) -> PlainNegamax { + PlainNegamax { depth: depth, root_value: 0, _eval: PhantomData } + } +} + +impl Strategy for PlainNegamax +where + ::S: Clone, + ::M: Copy, +{ + fn choose_move(&mut self, s: &::S) -> Option<::M> { + let mut moves = [None; 200]; + let n = E::G::generate_moves(s, &mut moves); + + let mut best_move = None; + let mut best_value = WORST_EVAL; + let mut s_clone = s.clone(); + for m in moves[..n].iter().map(|m| m.unwrap()) { + m.apply(&mut s_clone); + let value = -negamax::(&mut s_clone, self.depth); + m.undo(&mut s_clone); + if value > best_value { + best_value = value; + best_move = Some(m); + } + } + self.root_value = best_value; + best_move + } +} + +struct RandomEvaluator; + +impl minimax::Evaluator for RandomEvaluator { + type G = connect4::Game; + fn evaluate(b: &connect4::Board) -> minimax::Evaluation { + // Scramble the game state to get a deterministically random Evaluation. + let mut hash = b.pieces_to_move().wrapping_mul(0xe512dc15f0da3dd1); + hash = hash + .wrapping_add(hash >> 33) + .wrapping_add(b.all_pieces()) + .wrapping_mul(0x18d9db91aa689617); + hash = hash.wrapping_add(hash >> 31); + hash as minimax::Evaluation + } +} + +fn generate_random_state(depth: usize) -> connect4::Board { + let mut rng = rand::thread_rng(); + let mut b = connect4::Board::default(); + for _ in 0..depth { + let mut moves = [None; 10]; + let n = connect4::Game::generate_moves(&b, &mut moves); + let m = moves[rng.gen_range(0, n)].unwrap(); + m.apply(&mut b); + if connect4::Game::get_winner(&b).is_some() { + // Oops, undo and try again on the next iter. + m.undo(&mut b); + } + } + b +} + +#[test] +fn compare_strategies() { + for _ in 0..10 { + for max_depth in 0..5 { + let b = generate_random_state(10); + + let mut plain_negamax = PlainNegamax::::new(max_depth); + plain_negamax.choose_move(&b); + let value = plain_negamax.root_value; + + let mut negamax = + minimax::Negamax::::new(minimax::Options { max_depth: max_depth }); + negamax.choose_move(&b); + let negamax_value = negamax.root_value(); + assert_eq!(value, negamax_value, "search depth={}\n{}", max_depth, b); + } + } +} From 167539b2a6013b10fdbd4367e6e4c2b4a345db0a Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Sun, 14 Feb 2021 23:27:44 -0800 Subject: [PATCH 013/162] Add an iterative search strategy and Zobrist trait. Includes transposition table and timeout. --- examples/connect4.rs | 133 +++++++++++++++- src/interface.rs | 11 ++ src/lib.rs | 3 +- src/strategies/iterative.rs | 309 ++++++++++++++++++++++++++++++++++++ src/strategies/mod.rs | 1 + src/strategies/negamax.rs | 5 +- tests/strategies.rs | 14 +- 7 files changed, 466 insertions(+), 10 deletions(-) create mode 100644 src/strategies/iterative.rs diff --git a/examples/connect4.rs b/examples/connect4.rs index a2b76d9..63cee19 100644 --- a/examples/connect4.rs +++ b/examples/connect4.rs @@ -4,6 +4,7 @@ extern crate minimax; use std::default::Default; use std::fmt::{Display, Formatter, Result}; +use std::time::Duration; #[derive(Clone)] pub struct Board { @@ -21,6 +22,7 @@ pub struct Board { yellow_pieces: u64, reds_move: bool, num_moves: u8, + hash: u64, } const NUM_COLS: u32 = 7; @@ -40,11 +42,24 @@ impl Board { self.yellow_pieces } } + + fn update_hash(&mut self, piece: u64) { + // Lookup the hash for this position and this color. + let position = piece.trailing_zeros() as usize; + let color = self.num_moves as usize & 1; + self.hash ^= HASHES[(position << 1) | color]; + } } impl Default for Board { fn default() -> Board { - Board { red_pieces: 0, yellow_pieces: 0, reds_move: true, num_moves: 0 } + Board { red_pieces: 0, yellow_pieces: 0, reds_move: true, num_moves: 0, hash: 0 } + } +} + +impl minimax::Zobrist for Board { + fn zobrist_hash(&self) -> u64 { + self.hash } } @@ -70,7 +85,7 @@ impl Display for Board { } } -#[derive(Copy, Clone)] +#[derive(Copy, Clone, Eq, PartialEq)] pub struct Place { col: u8, } @@ -93,6 +108,7 @@ impl minimax::Move for Place { } b.reds_move = !b.reds_move; b.num_moves += 1; + b.update_hash(new_piece); } fn undo(&self, b: &mut Board) { @@ -104,6 +120,7 @@ impl minimax::Move for Place { } else { b.yellow_pieces &= !prev_piece; } + b.update_hash(prev_piece); b.num_moves -= 1; } } @@ -239,13 +256,17 @@ impl minimax::Evaluator for BasicEvaluator { #[cfg(not(test))] fn main() { - use minimax::strategies::negamax::{Negamax, Options}; use minimax::{Game, Move, Strategy}; + use minimax::{IterativeOptions, IterativeSearch, Negamax, Options}; let mut b = Board::default(); + let opts = IterativeOptions::default() + .with_table_byte_size(1_000_000) + .with_timeout(Duration::from_secs(1)) + .with_max_depth(20); let mut strategies: [&mut dyn Strategy; 2] = [ &mut Negamax::::new(Options { max_depth: 8 }), - &mut Negamax::::new(Options { max_depth: 8 }), + &mut IterativeSearch::::new(opts), ]; let mut s = 0; while self::Game::get_winner(&b).is_none() { @@ -263,3 +284,107 @@ fn main() { } println!("{}", b); } + +// There aren't that many positions per color, so just encode the zobrist hash statically. +const HASHES: [u64; 100] = [ + 0x73399349585d196e, + 0xe512dc15f0da3dd1, + 0x4fbc1b81c6197db2, + 0x16b5034810111a66, + 0xa9a9d0183e33c311, + 0xbb9d7bdea0dad2d6, + 0x089d9205c11ca5c7, + 0x18d9db91aa689617, + 0x1336123120681e34, + 0xc902e6c0bd6ef6bf, + 0x16985ba0916238c1, + 0x6144c3f2ab9f6dc4, + 0xf24b4842de919a02, + 0xdd6dd35ba0c150a1, + 0x369a9de8ec3676e3, + 0x2c366fb99be782d8, + 0x24d3231335c0dbd6, + 0x14048390c56e38f1, + 0x55dfbc820f635186, + 0x0dc98cb87372d5fa, + 0xe3098781582027b4, + 0x088158ec8202adca, + 0x231df62376ad9514, + 0xd3747fad069caeae, + 0x4e4f26cb41d0c620, + 0x06d0e37cd11b8f1c, + 0xed33865175fbbdd2, + 0xf1f52569481f0d8f, + 0xfb6fd5c922e2127c, + 0x6778bb0eba4a6649, + 0xe35b853bdac1210b, + 0x465a67712ec749a2, + 0x83b1fd78e576fe72, + 0xe84827644a5ccbe6, + 0x89095321ce8e4d03, + 0x298c529eecb0ec36, + 0xe9dcc93d77cb49ad, + 0xa7446daa1834c04a, + 0x93f15442b434d550, + 0x7f2a36dbf1cbce3f, + 0x03365a42023b02b3, + 0x101d87e850689cda, + 0x113b31e2760d2050, + 0x9cdb7b7394e1b0ae, + 0xd04530b3b7daf3a3, + 0x717e67aed6b4ffc9, + 0x4ae564a3f3ca8b03, + 0x07c50a4d89351437, + 0x7f3b32175e5f37e0, + 0x6e3599203bb50cd7, + 0xcfe2319d4a6cfa73, + 0xdbc6a398b10f5c3b, + 0x9c1ba28ae655bbd1, + 0x9dc87a426451941a, + 0x691e618354a55cb5, + 0x61b8cabbc575f4ba, + 0x7e6f31f1818593d4, + 0x9fa69e1ef4df8a9b, + 0x5a9dc96c3cb18d8f, + 0x65c4e9c0f40114f5, + 0x4e66504db2d937cf, + 0x4ebd6d097fe1e256, + 0xfb10983e639af6b1, + 0xcfbed7bd4032a59a, + 0x1f47f6a95049fe4f, + 0xbd461d202b879890, + 0xfc050073b0c74cbe, + 0x2923526a1f7092e9, + 0x0b1d30bb6b960bc7, + 0x632d12e4a9d0229d, + 0x8d4ffd6ab37c6bfd, + 0x561e36b8609b94ec, + 0x32e8482c9e7ed80c, + 0xaf62a119227b1029, + 0x62cb2a585410c311, + 0x7df3aeef90e1a0cb, + 0xe6d5a176f8a1b180, + 0x156e5162d8f2bef8, + 0xee84c58f5ebbe811, + 0xd32a1b4e24038bac, + 0xeaa1dbdbdd7731f7, + 0xedb554afd3d07cc6, + 0xbc789444317d4d05, + 0x0e23ce8f3d581fcd, + 0xacb498d4569249a8, + 0x843fb2519edc9f5a, + 0xe222f0eb79436809, + 0x7a88365f089ae80b, + 0x2a0f08694d7ea84d, + 0x09cad4dbfc990fa2, + 0xfe5f27499de6b4f8, + 0x3d8ed8ab1d44997f, + 0x2af64deca431f644, + 0xf2712b5274180c36, + 0x30eeae3a821bf86c, + 0x31c921831f06ad2f, + 0x40683ff11655cd2f, + 0xb78183a74cd6cb03, + 0xde9e15a6f99bda2f, + 0xa5293988641edb9b, +]; diff --git a/src/interface.rs b/src/interface.rs index a729072..3194cb7 100644 --- a/src/interface.rs +++ b/src/interface.rs @@ -61,6 +61,17 @@ impl Winner { } } +/// An optional trait for game state types to support hashing. +/// +/// Strategies that cache things by game state require this. +pub trait Zobrist { + /// Hash of the game position. + /// + /// Expected to be pre-calculated and cheaply updated with each apply or + /// undo. + fn zobrist_hash(&self) -> u64; +} + /// Defines the rules for a two-player, perfect-knowledge game. /// /// A game ties together types for the state and moves, generates the possible diff --git a/src/lib.rs b/src/lib.rs index c384f15..ebe7151 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,5 +4,6 @@ pub mod interface; pub mod strategies; pub mod util; -pub use interface::{Evaluation, Evaluator, Game, Move, Strategy, Winner}; +pub use interface::{Evaluation, Evaluator, Game, Move, Strategy, Winner, Zobrist}; +pub use strategies::iterative::{IterativeOptions, IterativeSearch}; pub use strategies::negamax::{Negamax, Options}; diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs new file mode 100644 index 0000000..b239819 --- /dev/null +++ b/src/strategies/iterative.rs @@ -0,0 +1,309 @@ +//! An implementation of iterative search. +//! +//! Search and evaluate at depth 0, then start over at depth 1, then depth 2, +//! etc. Can keep going until a maximum depth or maximum time or either. Uses +//! a transposition table to reuse information from previous iterations. + +use super::super::interface::*; + +use std::cmp::{max, min}; +use std::marker::PhantomData; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; +use std::thread::{sleep, spawn}; +use std::time::{Duration, Instant}; + +fn timeout_signal(dur: Duration) -> Arc { + // Theoretically we could include an async runtime to do this and use + // fewer threads, but the stdlib implementation is only a few lines... + let signal = Arc::new(AtomicBool::new(false)); + let signal2 = signal.clone(); + spawn(move || { + sleep(dur); + signal2.store(true, Ordering::Relaxed); + }); + signal +} + +#[derive(Copy, Clone, Eq, PartialEq)] +enum EntryFlag { + Exact, + Upperbound, + Lowerbound, +} + +// TODO: Optimize size. +#[derive(Copy, Clone)] +struct Entry { + hash: u64, + value: Evaluation, + depth: u8, + flag: EntryFlag, + best_move: Option, +} + +/* +impl Default for Entry { + fn default() -> Self { + Entry:: { hash: 0, value: 0, depth: 0, flag: EntryFlag::Exact, best_move: None } + } +}*/ + +struct TranspositionTable { + table: Vec>, + mask: usize, + minimum_depth: u8, +} + +impl TranspositionTable { + fn new(table_byte_size: usize) -> Self { + let size = (table_byte_size / std::mem::size_of::>()).next_power_of_two(); + let mut table = Vec::with_capacity(size); + for _ in 0..size { + table.push(Entry:: { + hash: 0, + value: 0, + depth: 0, + flag: EntryFlag::Exact, + best_move: None, + }); + } + //let table = vec![Entry::::default(); size]; + Self { table: table, mask: size - 1, minimum_depth: 1 } + } + + fn lookup(&self, hash: u64) -> Option<&Entry> { + let index = (hash as usize) & self.mask; + let entry = &self.table[index]; + if hash == entry.hash { + Some(entry) + } else { + None + } + } + + fn store(&mut self, hash: u64, value: Evaluation, depth: u8, flag: EntryFlag, best_move: M) { + let index = (hash as usize) & self.mask; + self.table[index] = + Entry { hash: hash, value: value, depth: depth, flag: flag, best_move: Some(best_move) } + } +} + +/// Options to use for the iterative search engine. +#[derive(Clone, Copy)] +pub struct IterativeOptions { + max_depth: usize, + max_time: Duration, + table_byte_size: usize, +} + +impl Default for IterativeOptions { + fn default() -> Self { + IterativeOptions { max_depth: 5, max_time: Duration::new(0, 0), table_byte_size: 100_000 } + } +} + +impl IterativeOptions { + pub fn with_timeout(mut self, dur: Duration) -> Self { + self.max_time = dur; + self + } + + pub fn with_max_depth(mut self, depth: usize) -> Self { + self.max_depth = depth; + self + } + + pub fn with_table_byte_size(mut self, size: usize) -> Self { + self.table_byte_size = size; + self + } +} + +pub struct IterativeSearch { + // These are public so that they can be changed for each move, while + // reusing the table state between runs. + /// The maximum depth to search. + pub max_depth: usize, + /// When non-zero, the maximum time to compute the best move. When the + /// timeout is hit, it returns the best move found of the previous + /// iteration. + pub max_time: Duration, + timeout: Arc, + transposition_table: TranspositionTable<<::G as Game>::M>, + prev_value: Evaluation, + _eval: PhantomData, + + // Runtime stats for the last move generated. + + // Maximum depth used to produce the move. + actual_depth: u8, + // Nodes explored up to this depth. + nodes_explored: usize, + // Nodes explored past this depth, and thus this is thrown away work. + next_depth_nodes: usize, + table_hits: usize, + wall_time: Duration, +} + +impl IterativeSearch { + pub fn new(opts: IterativeOptions) -> IterativeSearch { + let table = TranspositionTable::new(opts.table_byte_size); + IterativeSearch { + max_depth: opts.max_depth, + max_time: opts.max_time, + timeout: Arc::new(AtomicBool::new(false)), + transposition_table: table, + prev_value: 0, + _eval: PhantomData, + actual_depth: 0, + nodes_explored: 0, + next_depth_nodes: 0, + table_hits: 0, + wall_time: Duration::default(), + } + } + + /// Return a human-readable summary of the last move generated. + pub fn stats(&self) -> String { + let throughput = + (self.nodes_explored + self.next_depth_nodes) as f64 / self.wall_time.as_secs_f64(); + format!("Depth {} exploring {} nodes.\nPartial exploration of next depth explored {} nodes.\n{} transposition table hits.\n{:.01} nodes/sec", + self.actual_depth, self.nodes_explored, self.next_depth_nodes, self.table_hits, throughput) + } + + pub fn root_value(&self) -> Evaluation { + self.prev_value + } + + // Recursively compute negamax on the game state. Returns None if it hits the timeout. + fn negamax( + &mut self, s: &mut ::S, depth: u8, mut alpha: Evaluation, + mut beta: Evaluation, + ) -> Option + where + ::S: Zobrist, + ::M: Copy + Eq, + { + if self.timeout.load(Ordering::Relaxed) { + return None; + } + + self.next_depth_nodes += 1; + + if let Some(winner) = E::G::get_winner(s) { + return Some(winner.evaluate()); + } + if depth == 0 { + return Some(E::evaluate(s)); + } + + let alpha_orig = alpha; + let hash = s.zobrist_hash(); + // TODO: encapsulate in table func + let mut good_move = None; + if depth >= self.transposition_table.minimum_depth { + if let Some(entry) = self.transposition_table.lookup(hash) { + good_move = entry.best_move; + self.table_hits += 1; + if entry.depth >= depth { + match entry.flag { + EntryFlag::Exact => { + return Some(entry.value); + } + EntryFlag::Lowerbound => { + alpha = max(alpha, entry.value); + } + EntryFlag::Upperbound => { + beta = min(beta, entry.value); + } + } + if alpha >= beta { + return Some(entry.value); + } + } + } + } + + let mut moves = [None; 200]; + let n = E::G::generate_moves(s, &mut moves); + // Rearrange so predicted good move is first. + for i in 0..n { + if moves[i] == good_move { + moves.swap(0, i); + break; + } + } + + let mut best = WORST_EVAL; + let mut best_move = moves[0].unwrap(); + for m in moves.iter().take_while(|om| om.is_some()).map(|om| om.unwrap()) { + m.apply(s); + let value = -self.negamax(s, depth - 1, -beta, -alpha)?; + m.undo(s); + if value > best { + best = value; + best_move = m; + } + alpha = max(alpha, value); + if alpha >= beta { + break; + } + } + + // TODO: encapsulate in table func + if depth >= self.transposition_table.minimum_depth { + let flag = if best <= alpha_orig { + EntryFlag::Upperbound + } else if best >= beta { + EntryFlag::Lowerbound + } else { + EntryFlag::Exact + }; + self.transposition_table.store(hash, best, depth, flag, best_move); + } + + Some(best) + } +} + +impl Strategy for IterativeSearch +where + ::S: Clone + Zobrist, + ::M: Copy + Eq, +{ + fn choose_move(&mut self, s: &::S) -> Option<::M> { + // Reset stats. + self.nodes_explored = 0; + self.next_depth_nodes = 0; + self.actual_depth = 0; + self.table_hits = 0; + let start_time = Instant::now(); + // Start timer if configured. + self.timeout = if self.max_time == Duration::new(0, 0) { + Arc::new(AtomicBool::new(false)) + } else { + timeout_signal(self.max_time) + }; + + let root_hash = s.zobrist_hash(); + let mut s_clone = s.clone(); + let mut best_move = None; + + for depth in 0..=self.max_depth as u8 { + if self.negamax(&mut s_clone, depth + 1, WORST_EVAL, BEST_EVAL).is_none() { + // Timeout. Return the best move from the previous depth. + break; + } + let entry = self.transposition_table.lookup(root_hash).unwrap(); + best_move = entry.best_move; + + self.actual_depth = max(self.actual_depth, depth); + self.nodes_explored += self.next_depth_nodes; + self.prev_value = entry.value; + self.next_depth_nodes = 0; + } + self.wall_time = start_time.elapsed(); + best_move + } +} diff --git a/src/strategies/mod.rs b/src/strategies/mod.rs index 8855f06..522dc39 100644 --- a/src/strategies/mod.rs +++ b/src/strategies/mod.rs @@ -1,4 +1,5 @@ //! Strategy implementations. +pub mod iterative; pub mod negamax; pub mod random; diff --git a/src/strategies/negamax.rs b/src/strategies/negamax.rs index 5ec807a..08f4ee7 100644 --- a/src/strategies/negamax.rs +++ b/src/strategies/negamax.rs @@ -1,8 +1,7 @@ //! An implementation of Negamax. //! -//! Currently, only the basic alpha-pruning variant is implemented. Further work -//! could add advanced features, like history and/or transposition tables. This -//! picks randomly among the "best" moves, so that it's non-deterministic. +//! With only the basic alpha-pruning implemented. This picks randomly among +//! the "best" moves, so that it's non-deterministic. use super::super::interface::*; use rand; diff --git a/tests/strategies.rs b/tests/strategies.rs index 89d75fe..b29649e 100644 --- a/tests/strategies.rs +++ b/tests/strategies.rs @@ -86,7 +86,8 @@ impl minimax::Evaluator for RandomEvaluator { .wrapping_add(b.all_pieces()) .wrapping_mul(0x18d9db91aa689617); hash = hash.wrapping_add(hash >> 31); - hash as minimax::Evaluation + // Use fewer bits so that we get some equal values. + (hash as minimax::Evaluation) >> 25 } } @@ -107,7 +108,7 @@ fn generate_random_state(depth: usize) -> connect4::Board { } #[test] -fn compare_strategies() { +fn compare_plain_negamax() { for _ in 0..10 { for max_depth in 0..5 { let b = generate_random_state(10); @@ -121,6 +122,15 @@ fn compare_strategies() { negamax.choose_move(&b); let negamax_value = negamax.root_value(); assert_eq!(value, negamax_value, "search depth={}\n{}", max_depth, b); + + let mut iterative = minimax::IterativeSearch::::new( + minimax::IterativeOptions::default() + .with_table_byte_size(64000) + .with_max_depth(max_depth), + ); + iterative.choose_move(&b); + let iterative_value = iterative.root_value(); + assert_eq!(value, iterative_value, "search depth={}\n{}", max_depth, b); } } } From d258cad5ddaebfdf4a5400d42836aa89e3a4ab5e Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Mon, 15 Feb 2021 10:19:00 -0800 Subject: [PATCH 014/162] Add benchmarks based on connect four comparing negamax and iterative strategies. --- benches/negamax.rs | 65 ++++++++++++++------------------------------ examples/connect4.rs | 2 +- 2 files changed, 21 insertions(+), 46 deletions(-) diff --git a/benches/negamax.rs b/benches/negamax.rs index 00b1ca8..4386ab6 100644 --- a/benches/negamax.rs +++ b/benches/negamax.rs @@ -1,56 +1,31 @@ #[macro_use] extern crate bencher; extern crate minimax; +#[path = "../examples/connect4.rs"] +mod connect4; + use bencher::Bencher; use minimax::*; -#[derive(Clone)] -pub struct Board; - -#[derive(Copy, Clone)] -pub struct Place; - -pub struct Eval; - -pub struct Noop; - -impl Move for Place { - type G = Noop; - fn apply(&self, _: &mut Board) {} - fn undo(&self, _: &mut Board) {} -} - -impl Game for Noop { - type S = Board; - type M = Place; - - fn generate_moves(_: &Board, ms: &mut [Option]) -> usize { - const NUM_MOVES: usize = 4; - for m in ms.iter_mut().take(NUM_MOVES) { - *m = Some(Place); - } - ms[NUM_MOVES] = None; - NUM_MOVES - } - - fn get_winner(_: &Board) -> Option { - None - } -} - -impl Evaluator for Eval { - type G = Noop; - - fn evaluate(_: &Board) -> Evaluation { - 0 - } +fn bench_negamax(b: &mut Bencher) { + let board = connect4::Board::default(); + b.iter(|| { + let mut s = Negamax::::new(Options { max_depth: 5 }); + let m = s.choose_move(&board); + assert!(m.is_some()); + }); } -fn bench_negamax(b: &mut Bencher) { - let board = Board; - let mut s = Negamax::::new(Options { max_depth: 10 }); - b.iter(|| s.choose_move(&board)); +fn bench_iterative(b: &mut Bencher) { + let board = connect4::Board::default(); + b.iter(|| { + let mut s = IterativeSearch::::new( + IterativeOptions::default().with_table_byte_size(128_000).with_max_depth(5), + ); + let m = s.choose_move(&board); + assert!(m.is_some()); + }); } -benchmark_group!(benches, bench_negamax); +benchmark_group!(benches, bench_negamax, bench_iterative); benchmark_main!(benches); diff --git a/examples/connect4.rs b/examples/connect4.rs index 63cee19..8fd8463 100644 --- a/examples/connect4.rs +++ b/examples/connect4.rs @@ -1,4 +1,5 @@ //! A definition of the game Connect Four using the library, for use in tests and benchmarks. +#![allow(dead_code)] extern crate minimax; @@ -254,7 +255,6 @@ impl minimax::Evaluator for BasicEvaluator { } } -#[cfg(not(test))] fn main() { use minimax::{Game, Move, Strategy}; use minimax::{IterativeOptions, IterativeSearch, Negamax, Options}; From dc34591a4183e94c8f03e519484d7ff633a65f5f Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Mon, 15 Feb 2021 21:17:56 -0800 Subject: [PATCH 015/162] Simplify Negamax config by removing the Options struct. Negamax isn't going to grow more options. It will remain the basic Strategy with the least trait requirements of a game. --- benches/negamax.rs | 2 +- examples/connect4.rs | 4 ++-- examples/ttt.rs | 6 +++--- src/lib.rs | 2 +- src/strategies/negamax.rs | 16 +++++----------- tests/strategies.rs | 2 +- tests/ttt.rs | 9 ++++----- 7 files changed, 17 insertions(+), 24 deletions(-) diff --git a/benches/negamax.rs b/benches/negamax.rs index 4386ab6..93b301f 100644 --- a/benches/negamax.rs +++ b/benches/negamax.rs @@ -10,7 +10,7 @@ use minimax::*; fn bench_negamax(b: &mut Bencher) { let board = connect4::Board::default(); b.iter(|| { - let mut s = Negamax::::new(Options { max_depth: 5 }); + let mut s = Negamax::::with_max_depth(5); let m = s.choose_move(&board); assert!(m.is_some()); }); diff --git a/examples/connect4.rs b/examples/connect4.rs index 8fd8463..53c696c 100644 --- a/examples/connect4.rs +++ b/examples/connect4.rs @@ -257,7 +257,7 @@ impl minimax::Evaluator for BasicEvaluator { fn main() { use minimax::{Game, Move, Strategy}; - use minimax::{IterativeOptions, IterativeSearch, Negamax, Options}; + use minimax::{IterativeOptions, IterativeSearch, Negamax}; let mut b = Board::default(); let opts = IterativeOptions::default() @@ -265,7 +265,7 @@ fn main() { .with_timeout(Duration::from_secs(1)) .with_max_depth(20); let mut strategies: [&mut dyn Strategy; 2] = [ - &mut Negamax::::new(Options { max_depth: 8 }), + &mut Negamax::::with_max_depth(8), &mut IterativeSearch::::new(opts), ]; let mut s = 0; diff --git a/examples/ttt.rs b/examples/ttt.rs index feea5c2..a70a699 100644 --- a/examples/ttt.rs +++ b/examples/ttt.rs @@ -236,13 +236,13 @@ impl minimax::Evaluator for Evaluator { } fn main() { - use minimax::strategies::negamax::{Negamax, Options}; + use minimax::strategies::negamax::Negamax; use minimax::{Game, Move, Strategy}; let mut b = Board::default(); let mut strategies = vec![ - Negamax::::new(Options { max_depth: 10 }), - Negamax::::new(Options { max_depth: 10 }), + Negamax::::with_max_depth(10), + Negamax::::with_max_depth(10), ]; let mut s = 0; while self::Game::get_winner(&b).is_none() { diff --git a/src/lib.rs b/src/lib.rs index ebe7151..456ad8a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -6,4 +6,4 @@ pub mod util; pub use interface::{Evaluation, Evaluator, Game, Move, Strategy, Winner, Zobrist}; pub use strategies::iterative::{IterativeOptions, IterativeSearch}; -pub use strategies::negamax::{Negamax, Options}; +pub use strategies::negamax::Negamax; diff --git a/src/strategies/negamax.rs b/src/strategies/negamax.rs index 08f4ee7..199256d 100644 --- a/src/strategies/negamax.rs +++ b/src/strategies/negamax.rs @@ -50,25 +50,19 @@ where degrade_wins(best) } -/// Options to use for the `Negamax` engine. -pub struct Options { - /// The maximum depth within the game tree. - pub max_depth: usize, -} - pub struct Negamax { - opts: Options, + max_depth: usize, rng: rand::ThreadRng, prev_value: Evaluation, _eval: PhantomData, } impl Negamax { - pub fn new(opts: Options) -> Negamax { - Negamax { opts: opts, rng: rand::thread_rng(), prev_value: 0, _eval: PhantomData } + pub fn with_max_depth(depth: usize) -> Negamax { + Negamax { max_depth: depth, rng: rand::thread_rng(), prev_value: 0, _eval: PhantomData } } - // Return the value computed for the root node for the last computation. + /// Return the value computed for the root node for the last computation. pub fn root_value(&self) -> Evaluation { // Undo any value clamping. if self.prev_value > BEST_EVAL - 100 { @@ -99,7 +93,7 @@ where for m in moves.iter().take_while(|m| m.is_some()).map(|m| m.unwrap()) { // determine value for this move m.apply(&mut s_clone); - let value = -negamax::(&mut s_clone, self.opts.max_depth, WORST_EVAL, -best); + let value = -negamax::(&mut s_clone, self.max_depth, WORST_EVAL, -best); m.undo(&mut s_clone); // Strictly better than any move found so far. if value > best { diff --git a/tests/strategies.rs b/tests/strategies.rs index b29649e..27135cc 100644 --- a/tests/strategies.rs +++ b/tests/strategies.rs @@ -118,7 +118,7 @@ fn compare_plain_negamax() { let value = plain_negamax.root_value; let mut negamax = - minimax::Negamax::::new(minimax::Options { max_depth: max_depth }); + minimax::Negamax::::with_max_depth(max_depth); negamax.choose_move(&b); let negamax_value = negamax.root_value(); assert_eq!(value, negamax_value, "search depth={}\n{}", max_depth, b); diff --git a/tests/ttt.rs b/tests/ttt.rs index 60953f4..8bc2a6d 100644 --- a/tests/ttt.rs +++ b/tests/ttt.rs @@ -4,13 +4,13 @@ extern crate minimax; mod ttt; use minimax::util::battle_royale; +use minimax::Negamax; // Ensure that two players using negamax always results in a draw. #[test] fn test_ttt_negamax_always_draws() { - use minimax::strategies::negamax::{Negamax, Options}; - let mut s1 = Negamax::::new(Options { max_depth: 10 }); - let mut s2 = Negamax::::new(Options { max_depth: 10 }); + let mut s1 = Negamax::::with_max_depth(10); + let mut s2 = Negamax::::with_max_depth(10); for _ in 0..100 { assert_eq!(battle_royale(&mut s1, &mut s2), None); } @@ -20,9 +20,8 @@ fn test_ttt_negamax_always_draws() { // either a draw or a win for the former player. #[test] fn test_ttt_negamax_vs_random_always_wins_or_draws() { - use minimax::strategies::negamax::{Negamax, Options}; use minimax::strategies::random::Random; - let mut s1 = Negamax::::new(Options { max_depth: 10 }); + let mut s1 = Negamax::::with_max_depth(10); let mut s2 = Random::new(); for _ in 0..100 { assert_ne!(battle_royale(&mut s1, &mut s2), Some(1)); From 339bab27dd0d061850ff3f0064fc5832055b290c Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Mon, 15 Feb 2021 21:49:31 -0800 Subject: [PATCH 016/162] Simplify IterativeSearch options. Take the mutable options out of the initial configuration, and prohibit setting max depth and max time at the same time. --- benches/negamax.rs | 3 +- examples/connect4.rs | 13 +++----- examples/ttt.rs | 6 ++-- src/strategies/iterative.rs | 64 ++++++++++++++++--------------------- tests/strategies.rs | 8 ++--- 5 files changed, 40 insertions(+), 54 deletions(-) diff --git a/benches/negamax.rs b/benches/negamax.rs index 93b301f..8285bd3 100644 --- a/benches/negamax.rs +++ b/benches/negamax.rs @@ -20,8 +20,9 @@ fn bench_iterative(b: &mut Bencher) { let board = connect4::Board::default(); b.iter(|| { let mut s = IterativeSearch::::new( - IterativeOptions::default().with_table_byte_size(128_000).with_max_depth(5), + IterativeOptions::new().with_table_byte_size(128_000), ); + s.set_max_depth(5); let m = s.choose_move(&board); assert!(m.is_some()); }); diff --git a/examples/connect4.rs b/examples/connect4.rs index 53c696c..9339f96 100644 --- a/examples/connect4.rs +++ b/examples/connect4.rs @@ -260,14 +260,11 @@ fn main() { use minimax::{IterativeOptions, IterativeSearch, Negamax}; let mut b = Board::default(); - let opts = IterativeOptions::default() - .with_table_byte_size(1_000_000) - .with_timeout(Duration::from_secs(1)) - .with_max_depth(20); - let mut strategies: [&mut dyn Strategy; 2] = [ - &mut Negamax::::with_max_depth(8), - &mut IterativeSearch::::new(opts), - ]; + let opts = IterativeOptions::new().with_table_byte_size(1_000_000); + let mut iterative = IterativeSearch::::new(opts); + iterative.set_timeout(Duration::from_secs(1)); + let mut strategies: [&mut dyn Strategy; 2] = + [&mut Negamax::::with_max_depth(8), &mut iterative]; let mut s = 0; while self::Game::get_winner(&b).is_none() { println!("{}", b); diff --git a/examples/ttt.rs b/examples/ttt.rs index a70a699..564aef8 100644 --- a/examples/ttt.rs +++ b/examples/ttt.rs @@ -240,10 +240,8 @@ fn main() { use minimax::{Game, Move, Strategy}; let mut b = Board::default(); - let mut strategies = vec![ - Negamax::::with_max_depth(10), - Negamax::::with_max_depth(10), - ]; + let mut strategies = + vec![Negamax::::with_max_depth(10), Negamax::::with_max_depth(10)]; let mut s = 0; while self::Game::get_winner(&b).is_none() { println!("{}", b); diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index b239819..e57b985 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -42,13 +42,6 @@ struct Entry { best_move: Option, } -/* -impl Default for Entry { - fn default() -> Self { - Entry:: { hash: 0, value: 0, depth: 0, flag: EntryFlag::Exact, best_move: None } - } -}*/ - struct TranspositionTable { table: Vec>, mask: usize, @@ -68,7 +61,6 @@ impl TranspositionTable { best_move: None, }); } - //let table = vec![Entry::::default(); size]; Self { table: table, mask: size - 1, minimum_depth: 1 } } @@ -92,28 +84,18 @@ impl TranspositionTable { /// Options to use for the iterative search engine. #[derive(Clone, Copy)] pub struct IterativeOptions { - max_depth: usize, - max_time: Duration, table_byte_size: usize, + // TODO: support more configuration of replacement strategy + // https://www.chessprogramming.org/Transposition_Table#Replacement_Strategies } -impl Default for IterativeOptions { - fn default() -> Self { - IterativeOptions { max_depth: 5, max_time: Duration::new(0, 0), table_byte_size: 100_000 } +impl IterativeOptions { + pub fn new() -> Self { + IterativeOptions { table_byte_size: 1_000_000 } } } impl IterativeOptions { - pub fn with_timeout(mut self, dur: Duration) -> Self { - self.max_time = dur; - self - } - - pub fn with_max_depth(mut self, depth: usize) -> Self { - self.max_depth = depth; - self - } - pub fn with_table_byte_size(mut self, size: usize) -> Self { self.table_byte_size = size; self @@ -121,14 +103,8 @@ impl IterativeOptions { } pub struct IterativeSearch { - // These are public so that they can be changed for each move, while - // reusing the table state between runs. - /// The maximum depth to search. - pub max_depth: usize, - /// When non-zero, the maximum time to compute the best move. When the - /// timeout is hit, it returns the best move found of the previous - /// iteration. - pub max_time: Duration, + max_depth: usize, + max_time: Duration, timeout: Arc, transposition_table: TranspositionTable<<::G as Game>::M>, prev_value: Evaluation, @@ -150,8 +126,8 @@ impl IterativeSearch { pub fn new(opts: IterativeOptions) -> IterativeSearch { let table = TranspositionTable::new(opts.table_byte_size); IterativeSearch { - max_depth: opts.max_depth, - max_time: opts.max_time, + max_depth: 100, + max_time: Duration::from_secs(5), timeout: Arc::new(AtomicBool::new(false)), transposition_table: table, prev_value: 0, @@ -164,14 +140,30 @@ impl IterativeSearch { } } - /// Return a human-readable summary of the last move generated. + /// Set the maximum depth to search. Disables the timeout. + /// This can be changed between moves while reusing the transposition table. + pub fn set_max_depth(&mut self, depth: usize) { + self.max_depth = depth; + self.max_time = Duration::new(0, 0); + } + + /// The maximum time to compute the best move. When the timeout is hit, it + /// returns the best move found of the previous full iteration. Unlimited + /// max depth. + pub fn set_timeout(&mut self, max_time: Duration) { + self.max_time = max_time; + self.max_depth = 100; + } + + /// Return a human-readable summary of the last move generation. pub fn stats(&self) -> String { let throughput = (self.nodes_explored + self.next_depth_nodes) as f64 / self.wall_time.as_secs_f64(); - format!("Depth {} exploring {} nodes.\nPartial exploration of next depth explored {} nodes.\n{} transposition table hits.\n{:.01} nodes/sec", - self.actual_depth, self.nodes_explored, self.next_depth_nodes, self.table_hits, throughput) + format!("Explored {} nodes to depth {}.\nInterrupted exploration of next depth explored {} nodes.\n{} transposition table hits.\n{} nodes/sec", + self.nodes_explored, self.actual_depth, self.next_depth_nodes, self.table_hits, throughput as usize) } + /// Return the value computed for the root node for the last computation. pub fn root_value(&self) -> Evaluation { self.prev_value } diff --git a/tests/strategies.rs b/tests/strategies.rs index 27135cc..a3b3630 100644 --- a/tests/strategies.rs +++ b/tests/strategies.rs @@ -117,17 +117,15 @@ fn compare_plain_negamax() { plain_negamax.choose_move(&b); let value = plain_negamax.root_value; - let mut negamax = - minimax::Negamax::::with_max_depth(max_depth); + let mut negamax = minimax::Negamax::::with_max_depth(max_depth); negamax.choose_move(&b); let negamax_value = negamax.root_value(); assert_eq!(value, negamax_value, "search depth={}\n{}", max_depth, b); let mut iterative = minimax::IterativeSearch::::new( - minimax::IterativeOptions::default() - .with_table_byte_size(64000) - .with_max_depth(max_depth), + minimax::IterativeOptions::new().with_table_byte_size(64000), ); + iterative.set_max_depth(max_depth); iterative.choose_move(&b); let iterative_value = iterative.root_value(); assert_eq!(value, iterative_value, "search depth={}\n{}", max_depth, b); From 74edfcc08ad9000c5e592d968c3860ac90509b96 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Tue, 16 Feb 2021 13:56:02 -0800 Subject: [PATCH 017/162] Encapsulate replacement strategy in TranspositionTable. --- src/strategies/iterative.rs | 60 +++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 32 deletions(-) diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index e57b985..e9f9c8c 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -75,9 +75,11 @@ impl TranspositionTable { } fn store(&mut self, hash: u64, value: Evaluation, depth: u8, flag: EntryFlag, best_move: M) { - let index = (hash as usize) & self.mask; - self.table[index] = - Entry { hash: hash, value: value, depth: depth, flag: flag, best_move: Some(best_move) } + if depth >= self.minimum_depth { + let index = (hash as usize) & self.mask; + self.table[index] = + Entry { hash: hash, value: value, depth: depth, flag: flag, best_move: Some(best_move) } + } } } @@ -192,27 +194,24 @@ impl IterativeSearch { let alpha_orig = alpha; let hash = s.zobrist_hash(); - // TODO: encapsulate in table func let mut good_move = None; - if depth >= self.transposition_table.minimum_depth { - if let Some(entry) = self.transposition_table.lookup(hash) { - good_move = entry.best_move; - self.table_hits += 1; - if entry.depth >= depth { - match entry.flag { - EntryFlag::Exact => { - return Some(entry.value); - } - EntryFlag::Lowerbound => { - alpha = max(alpha, entry.value); - } - EntryFlag::Upperbound => { - beta = min(beta, entry.value); - } - } - if alpha >= beta { + if let Some(entry) = self.transposition_table.lookup(hash) { + good_move = entry.best_move; + self.table_hits += 1; + if entry.depth >= depth { + match entry.flag { + EntryFlag::Exact => { return Some(entry.value); } + EntryFlag::Lowerbound => { + alpha = max(alpha, entry.value); + } + EntryFlag::Upperbound => { + beta = min(beta, entry.value); + } + } + if alpha >= beta { + return Some(entry.value); } } } @@ -243,17 +242,14 @@ impl IterativeSearch { } } - // TODO: encapsulate in table func - if depth >= self.transposition_table.minimum_depth { - let flag = if best <= alpha_orig { - EntryFlag::Upperbound - } else if best >= beta { - EntryFlag::Lowerbound - } else { - EntryFlag::Exact - }; - self.transposition_table.store(hash, best, depth, flag, best_move); - } + let flag = if best <= alpha_orig { + EntryFlag::Upperbound + } else if best >= beta { + EntryFlag::Lowerbound + } else { + EntryFlag::Exact + }; + self.transposition_table.store(hash, best, depth, flag, best_move); Some(best) } From e895e84f6e398b63e60bf9d8927cf29fb3ba7f9a Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Tue, 16 Feb 2021 14:34:25 -0800 Subject: [PATCH 018/162] Refactor connect4 to twiddle colors less. --- examples/connect4.rs | 63 +++++++++++++++++-------------------- src/strategies/iterative.rs | 11 +++++-- tests/strategies.rs | 4 +-- 3 files changed, 38 insertions(+), 40 deletions(-) diff --git a/examples/connect4.rs b/examples/connect4.rs index 9339f96..d307c52 100644 --- a/examples/connect4.rs +++ b/examples/connect4.rs @@ -19,9 +19,8 @@ pub struct Board { * 1 8 15 22 29 36 43 * 0 7 14 21 28 35 42 */ - red_pieces: u64, - yellow_pieces: u64, - reds_move: bool, + all_pieces: u64, + pub pieces_to_move: u64, num_moves: u8, hash: u64, } @@ -32,16 +31,12 @@ const HEIGHT: u32 = NUM_ROWS + 1; const COL_MASK: u64 = (1 << NUM_ROWS) - 1; impl Board { - pub fn all_pieces(&self) -> u64 { - self.red_pieces | self.yellow_pieces + fn reds_move(&self) -> bool { + self.num_moves & 1 == 0 } - pub fn pieces_to_move(&self) -> u64 { - if self.reds_move { - self.red_pieces - } else { - self.yellow_pieces - } + pub fn pieces_just_moved(&self) -> u64 { + self.all_pieces ^ self.pieces_to_move } fn update_hash(&mut self, piece: u64) { @@ -54,7 +49,7 @@ impl Board { impl Default for Board { fn default() -> Board { - Board { red_pieces: 0, yellow_pieces: 0, reds_move: true, num_moves: 0, hash: 0 } + Board { all_pieces: 0, pieces_to_move: 0, num_moves: 0, hash: 0 } } } @@ -66,14 +61,18 @@ impl minimax::Zobrist for Board { impl Display for Board { fn fmt(&self, f: &mut Formatter) -> Result { + let red_pieces = + if self.reds_move() { self.pieces_to_move } else { self.pieces_just_moved() }; + let yellow_pieces = + if self.reds_move() { self.pieces_just_moved() } else { self.pieces_to_move }; for row in (0..6).rev() { for col in 0..7 { write!( f, "{}", - if self.red_pieces >> (row + col * HEIGHT) & 1 != 0 { + if red_pieces >> (row + col * HEIGHT) & 1 != 0 { '\u{1F534}' - } else if self.yellow_pieces >> (row + col * HEIGHT) & 1 != 0 { + } else if yellow_pieces >> (row + col * HEIGHT) & 1 != 0 { '\u{1F7E1}' } else { '\u{25ef}' @@ -100,27 +99,21 @@ impl Place { impl minimax::Move for Place { type G = Game; fn apply(&self, b: &mut Board) { - let col = (b.all_pieces() >> self.col_shift()) & COL_MASK; + let col = (b.all_pieces >> self.col_shift()) & COL_MASK; let new_piece = (col + 1) << self.col_shift(); - if b.reds_move { - b.red_pieces |= new_piece; - } else { - b.yellow_pieces |= new_piece; - } - b.reds_move = !b.reds_move; + // Swap colors + b.pieces_to_move ^= b.all_pieces; + b.all_pieces |= new_piece; b.num_moves += 1; b.update_hash(new_piece); } fn undo(&self, b: &mut Board) { - let col = (b.all_pieces() >> self.col_shift()) & COL_MASK; + let col = (b.all_pieces >> self.col_shift()) & COL_MASK; let prev_piece = (col ^ (col >> 1)) << self.col_shift(); - b.reds_move = !b.reds_move; - if b.reds_move { - b.red_pieces &= !prev_piece; - } else { - b.yellow_pieces &= !prev_piece; - } + b.all_pieces &= !prev_piece; + // Swap colors + b.pieces_to_move ^= b.all_pieces; b.update_hash(prev_piece); b.num_moves -= 1; } @@ -134,7 +127,7 @@ impl minimax::Game for Game { fn generate_moves(b: &Board, moves: &mut [Option]) -> usize { let mut n = 0; - let mut cols = b.all_pieces(); + let mut cols = b.all_pieces; for i in 0..NUM_COLS { if cols & COL_MASK < COL_MASK { moves[n] = Some(Place { col: i as u8 }); @@ -148,7 +141,7 @@ impl minimax::Game for Game { fn get_winner(b: &Board) -> Option { // Position of pieces for the player that just moved. - let pieces = if b.reds_move { b.yellow_pieces } else { b.red_pieces }; + let pieces = b.pieces_just_moved(); // Detect pairs of two pieces in a row, then pairs of two pairs in a // row. @@ -182,7 +175,7 @@ impl minimax::Evaluator for DumbEvaluator { impl Board { // Return bitmap of all open locations that would complete a four in a row for the given player. fn find_fourth_moves(&self, pieces: u64) -> u64 { - let mut all = self.all_pieces(); + let mut all = self.all_pieces; // Mark the fake row on top as full to prevent wrapping around. let mut top_row = COL_MASK + 1; for _ in 0..NUM_COLS { @@ -212,8 +205,8 @@ pub struct BasicEvaluator; impl minimax::Evaluator for BasicEvaluator { type G = Game; fn evaluate(b: &Board) -> minimax::Evaluation { - let player_pieces = if b.reds_move { b.red_pieces } else { b.yellow_pieces }; - let opponent_pieces = if b.reds_move { b.yellow_pieces } else { b.red_pieces }; + let player_pieces = b.pieces_to_move; + let opponent_pieces = b.pieces_just_moved(); let mut player_wins = b.find_fourth_moves(player_pieces); let mut opponent_wins = b.find_fourth_moves(opponent_pieces); @@ -226,7 +219,7 @@ impl minimax::Evaluator for BasicEvaluator { // Count columns that cause immediate win. // Count columns that then allow immediate win. - let mut all = b.all_pieces(); + let mut all = b.all_pieces; for _ in 0..NUM_COLS { let next_move = (all & COL_MASK) + 1; if next_move > COL_MASK { @@ -271,7 +264,7 @@ fn main() { let ref mut strategy = strategies[s]; match strategy.choose_move(&mut b) { Some(m) => { - let color = if b.reds_move { "Red" } else { "Yellow" }; + let color = if b.reds_move() { "Red" } else { "Yellow" }; println!("{} piece in column {}", color, m.col + 1); m.apply(&mut b) } diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index e9f9c8c..0d41d4f 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -77,9 +77,14 @@ impl TranspositionTable { fn store(&mut self, hash: u64, value: Evaluation, depth: u8, flag: EntryFlag, best_move: M) { if depth >= self.minimum_depth { let index = (hash as usize) & self.mask; - self.table[index] = - Entry { hash: hash, value: value, depth: depth, flag: flag, best_move: Some(best_move) } - } + self.table[index] = Entry { + hash: hash, + value: value, + depth: depth, + flag: flag, + best_move: Some(best_move), + } + } } } diff --git a/tests/strategies.rs b/tests/strategies.rs index a3b3630..8744193 100644 --- a/tests/strategies.rs +++ b/tests/strategies.rs @@ -80,10 +80,10 @@ impl minimax::Evaluator for RandomEvaluator { type G = connect4::Game; fn evaluate(b: &connect4::Board) -> minimax::Evaluation { // Scramble the game state to get a deterministically random Evaluation. - let mut hash = b.pieces_to_move().wrapping_mul(0xe512dc15f0da3dd1); + let mut hash = b.pieces_just_moved().wrapping_mul(0xe512dc15f0da3dd1); hash = hash .wrapping_add(hash >> 33) - .wrapping_add(b.all_pieces()) + .wrapping_add(b.pieces_to_move) .wrapping_mul(0x18d9db91aa689617); hash = hash.wrapping_add(hash >> 31); // Use fewer bits so that we get some equal values. From fed0f39ec4f6e930bb68582716b798d719ef2add Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Tue, 16 Feb 2021 15:45:49 -0800 Subject: [PATCH 019/162] Update documentation for 0.1.0 release. --- Cargo.toml | 8 ++++---- README.md | 16 ++++++++++------ 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index e22a435..44e228d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,10 +1,10 @@ [package] name = "minimax" -version = "0.0.2" -authors = [ "Samuel Fredrickson " ] +version = "0.1.0" +authors = [ "Samuel Fredrickson ", "Eric Roshan-Eisner " ] description = "Generic implementations of Minimax." -documentation = "http://kinghajj.github.io/doc/minimax/" -repository = "http://github.com/kinghajj/minimax-rs.git" +documentation = "https://docs.rs/minimax" +repository = "https://github.com/edre/minimax-rs" readme = "README.md" keywords = ["ai", "game", "minimax", "negamax"] license = "MIT" diff --git a/README.md b/README.md index 79ce1bf..2146793 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,8 @@ # minimax-rs - Generic implementations of Minimax in Rust. -[![Build Status](https://travis-ci.org/kinghajj/minimax-rs.svg?branch=master)](https://travis-ci.org/kinghajj/minimax-rs) [![Crates.io](https://img.shields.io/crates/v/minimax.svg)](https://crates.io/crates/minimax) +[![Build Status](https://api.travis-ci.com/edre/minimax-rs.svg?branch=master)](https://travis-ci.com/github/edre/minimax-rs) +[![Crates.io](https://img.shields.io/crates/v/minimax.svg)](https://crates.io/crates/minimax) +[![Documentation](https://docs.rs/minimax/badge.svg)](https://docs.rs/minimax) ## About @@ -10,14 +12,16 @@ This library provides interfaces that describe: 2. methods of evaluating particular game states for a player; and 3. strategies for choosing moves for a player. -The eventual goal is to have multiple proper strategies, so that any combination -of evaluators and strategies can be tested against each other. Currently, only -a basic alpha-beta pruning Negamax strategy is implemented. +The eventual goal is to have multiple proper strategies, so that any +combination of evaluators and strategies can be tested against each +other. Currently, single-threaded algorithms using alpha-beta pruning, +iterative deeping, and transposition tables are provided. ## Example -The `ttt` module contains an implementation of Tic-Tac-Toe, demonstrating how to -use the game and evaluation interfaces. `test` shows how to use strategies. +The `ttt` and `connect4` modules contain implementations of Tic-Tac-Toe and +Connect Four, demonstrating how to use the game and evaluation interfaces. +`test` shows how to use strategies. ## License From 3854eda6d79f2c6801350ceb14d6b0f16f62d916 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Tue, 16 Feb 2021 16:24:24 -0800 Subject: [PATCH 020/162] Tidy some doc comments. --- src/interface.rs | 2 +- src/strategies/iterative.rs | 9 +++++---- src/strategies/negamax.rs | 1 - 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/interface.rs b/src/interface.rs index 3194cb7..ae70888 100644 --- a/src/interface.rs +++ b/src/interface.rs @@ -1,6 +1,6 @@ //! The common structures and traits. -/// An assessment of a game state from the perspective of the player about to move. +/// An assessment of a game state from the perspective of the player whose turn it is to play. /// Higher values mean a more favorable state. /// A draw is defined as a score of zero. pub type Evaluation = i32; diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index 0d41d4f..3976b59 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -1,4 +1,4 @@ -//! An implementation of iterative search. +//! An implementation of iterative deepening evaluation. //! //! Search and evaluate at depth 0, then start over at depth 1, then depth 2, //! etc. Can keep going until a maximum depth or maximum time or either. Uses @@ -103,6 +103,7 @@ impl IterativeOptions { } impl IterativeOptions { + /// Approximately how large the transposition table should be in memory. pub fn with_table_byte_size(mut self, size: usize) -> Self { self.table_byte_size = size; self @@ -154,9 +155,9 @@ impl IterativeSearch { self.max_time = Duration::new(0, 0); } - /// The maximum time to compute the best move. When the timeout is hit, it - /// returns the best move found of the previous full iteration. Unlimited - /// max depth. + /// Set the maximum time to compute the best move. When the timeout is + /// hit, it returns the best move found of the previous full + /// iteration. Unlimited max depth. pub fn set_timeout(&mut self, max_time: Duration) { self.max_time = max_time; self.max_depth = 100; diff --git a/src/strategies/negamax.rs b/src/strategies/negamax.rs index 199256d..2399d70 100644 --- a/src/strategies/negamax.rs +++ b/src/strategies/negamax.rs @@ -62,7 +62,6 @@ impl Negamax { Negamax { max_depth: depth, rng: rand::thread_rng(), prev_value: 0, _eval: PhantomData } } - /// Return the value computed for the root node for the last computation. pub fn root_value(&self) -> Evaluation { // Undo any value clamping. if self.prev_value > BEST_EVAL - 100 { From 77803cfccb7cd0b6411997e698a4c53b35ba58b9 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Tue, 16 Feb 2021 23:21:15 -0800 Subject: [PATCH 021/162] Hide pub testing functions. --- src/strategies/iterative.rs | 2 +- src/strategies/negamax.rs | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index 3976b59..2aac995 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -171,7 +171,7 @@ impl IterativeSearch { self.nodes_explored, self.actual_depth, self.next_depth_nodes, self.table_hits, throughput as usize) } - /// Return the value computed for the root node for the last computation. + #[doc(hidden)] pub fn root_value(&self) -> Evaluation { self.prev_value } diff --git a/src/strategies/negamax.rs b/src/strategies/negamax.rs index 2399d70..2885d87 100644 --- a/src/strategies/negamax.rs +++ b/src/strategies/negamax.rs @@ -62,6 +62,7 @@ impl Negamax { Negamax { max_depth: depth, rng: rand::thread_rng(), prev_value: 0, _eval: PhantomData } } + #[doc(hidden)] pub fn root_value(&self) -> Evaluation { // Undo any value clamping. if self.prev_value > BEST_EVAL - 100 { From d3744a4303fbd08ede2c29554519944bfae3c6e6 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Wed, 17 Feb 2021 21:45:25 -0800 Subject: [PATCH 022/162] Add configuration for transposition table replacement strategy. --- benches/negamax.rs | 4 +- examples/connect4.rs | 21 ++++++++++- src/lib.rs | 2 +- src/strategies/iterative.rs | 73 ++++++++++++++++++++++++++++++++----- tests/strategies.rs | 26 +++++++++---- 5 files changed, 105 insertions(+), 21 deletions(-) diff --git a/benches/negamax.rs b/benches/negamax.rs index 8285bd3..e28fc8f 100644 --- a/benches/negamax.rs +++ b/benches/negamax.rs @@ -20,7 +20,9 @@ fn bench_iterative(b: &mut Bencher) { let board = connect4::Board::default(); b.iter(|| { let mut s = IterativeSearch::::new( - IterativeOptions::new().with_table_byte_size(128_000), + IterativeOptions::new() + .with_table_byte_size(32_000) + .with_replacement_strategy(ReplacementStrategy::TwoTier), ); s.set_max_depth(5); let m = s.choose_move(&board); diff --git a/examples/connect4.rs b/examples/connect4.rs index d307c52..80e5660 100644 --- a/examples/connect4.rs +++ b/examples/connect4.rs @@ -5,7 +5,7 @@ extern crate minimax; use std::default::Default; use std::fmt::{Display, Formatter, Result}; -use std::time::Duration; +use std::time::{Duration, Instant}; #[derive(Clone)] pub struct Board { @@ -248,12 +248,29 @@ impl minimax::Evaluator for BasicEvaluator { } } +fn game_benchmark(strategy: &mut dyn minimax::Strategy) -> Duration { + // To not depend on deterministic moves, maybe trace the moves of a fixed game. + use minimax::{Game, Move}; + + let start = Instant::now(); + let mut b = Board::default(); + while self::Game::get_winner(&b).is_none() { + match strategy.choose_move(&mut b) { + Some(m) => m.apply(&mut b), + None => break, + } + } + start.elapsed() +} + fn main() { use minimax::{Game, Move, Strategy}; use minimax::{IterativeOptions, IterativeSearch, Negamax}; let mut b = Board::default(); - let opts = IterativeOptions::new().with_table_byte_size(1_000_000); + let opts = IterativeOptions::new() + .with_table_byte_size(1_000_000) + .with_replacement_strategy(minimax::Replacement::DepthPreferred); let mut iterative = IterativeSearch::::new(opts); iterative.set_timeout(Duration::from_secs(1)); let mut strategies: [&mut dyn Strategy; 2] = diff --git a/src/lib.rs b/src/lib.rs index 456ad8a..ba991dd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,5 +5,5 @@ pub mod strategies; pub mod util; pub use interface::{Evaluation, Evaluator, Game, Move, Strategy, Winner, Zobrist}; -pub use strategies::iterative::{IterativeOptions, IterativeSearch}; +pub use strategies::iterative::{IterativeOptions, IterativeSearch, Replacement}; pub use strategies::negamax::Negamax; diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index 2aac995..c4f3e3f 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -25,6 +25,15 @@ fn timeout_signal(dur: Duration) -> Arc { signal } +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +/// Strategies for when to overwrite entries in the transition table. +pub enum Replacement { + Always, + DepthPreferred, + TwoTier, + // TODO: Bucket(size) +} + #[derive(Copy, Clone, Eq, PartialEq)] enum EntryFlag { Exact, @@ -32,25 +41,30 @@ enum EntryFlag { Lowerbound, } -// TODO: Optimize size. +// TODO: Optimize size. Ideally 16 bytes or less. #[derive(Copy, Clone)] struct Entry { hash: u64, value: Evaluation, depth: u8, flag: EntryFlag, + generation: u8, best_move: Option, } struct TranspositionTable { table: Vec>, mask: usize, - minimum_depth: u8, + // Incremented for each iterative deepening run. + // Values from old generations are always overwritten. + generation: u8, + strategy: Replacement, } impl TranspositionTable { - fn new(table_byte_size: usize) -> Self { + fn new(table_byte_size: usize, strategy: Replacement) -> Self { let size = (table_byte_size / std::mem::size_of::>()).next_power_of_two(); + let mask = if strategy == Replacement::TwoTier { (size - 1) & !1 } else { size - 1 }; let mut table = Vec::with_capacity(size); for _ in 0..size { table.push(Entry:: { @@ -58,10 +72,15 @@ impl TranspositionTable { value: 0, depth: 0, flag: EntryFlag::Exact, + generation: 0, best_move: None, }); } - Self { table: table, mask: size - 1, minimum_depth: 1 } + Self { table: table, mask: mask, generation: 0, strategy: strategy } + } + + fn advance_generation(&mut self) { + self.generation = self.generation.wrapping_add(1); } fn lookup(&self, hash: u64) -> Option<&Entry> { @@ -69,19 +88,48 @@ impl TranspositionTable { let entry = &self.table[index]; if hash == entry.hash { Some(entry) + } else if self.strategy == Replacement::TwoTier { + let entry = &self.table[index + 1]; + if hash == entry.hash { + Some(entry) + } else { + None + } } else { None } } fn store(&mut self, hash: u64, value: Evaluation, depth: u8, flag: EntryFlag, best_move: M) { - if depth >= self.minimum_depth { - let index = (hash as usize) & self.mask; + let dest = match self.strategy { + Replacement::Always => Some((hash as usize) & self.mask), + Replacement::DepthPreferred => { + let index = (hash as usize) & self.mask; + let entry = &self.table[index]; + if entry.generation != self.generation || entry.depth < depth { + Some(index) + } else { + None + } + } + Replacement::TwoTier => { + // index points to the first of a pair of entries, the depth-preferred entry and the always-replace entry. + let index = (hash as usize) & self.mask; + let entry = &self.table[index]; + if entry.generation != self.generation || entry.depth < depth { + Some(index) + } else { + Some(index + 1) + } + } + }; + if let Some(index) = dest { self.table[index] = Entry { hash: hash, value: value, depth: depth, flag: flag, + generation: self.generation, best_move: Some(best_move), } } @@ -92,13 +140,12 @@ impl TranspositionTable { #[derive(Clone, Copy)] pub struct IterativeOptions { table_byte_size: usize, - // TODO: support more configuration of replacement strategy - // https://www.chessprogramming.org/Transposition_Table#Replacement_Strategies + strategy: Replacement, } impl IterativeOptions { pub fn new() -> Self { - IterativeOptions { table_byte_size: 1_000_000 } + IterativeOptions { table_byte_size: 1_000_000, strategy: Replacement::TwoTier } } } @@ -108,6 +155,11 @@ impl IterativeOptions { self.table_byte_size = size; self } + /// Approximately how large the transposition table should be in memory. + pub fn with_replacement_strategy(mut self, strategy: Replacement) -> Self { + self.strategy = strategy; + self + } } pub struct IterativeSearch { @@ -132,7 +184,7 @@ pub struct IterativeSearch { impl IterativeSearch { pub fn new(opts: IterativeOptions) -> IterativeSearch { - let table = TranspositionTable::new(opts.table_byte_size); + let table = TranspositionTable::new(opts.table_byte_size, opts.strategy); IterativeSearch { max_depth: 100, max_time: Duration::from_secs(5), @@ -267,6 +319,7 @@ where ::M: Copy + Eq, { fn choose_move(&mut self, s: &::S) -> Option<::M> { + self.transposition_table.advance_generation(); // Reset stats. self.nodes_explored = 0; self.next_depth_nodes = 0; diff --git a/tests/strategies.rs b/tests/strategies.rs index 8744193..b8dd3f6 100644 --- a/tests/strategies.rs +++ b/tests/strategies.rs @@ -122,13 +122,25 @@ fn compare_plain_negamax() { let negamax_value = negamax.root_value(); assert_eq!(value, negamax_value, "search depth={}\n{}", max_depth, b); - let mut iterative = minimax::IterativeSearch::::new( - minimax::IterativeOptions::new().with_table_byte_size(64000), - ); - iterative.set_max_depth(max_depth); - iterative.choose_move(&b); - let iterative_value = iterative.root_value(); - assert_eq!(value, iterative_value, "search depth={}\n{}", max_depth, b); + for &strategy in &[ + minimax::Replacement::Always, + minimax::Replacement::DepthPreferred, + minimax::Replacement::TwoTier, + ] { + let mut iterative = minimax::IterativeSearch::::new( + minimax::IterativeOptions::new() + .with_table_byte_size(64000) + .with_replacement_strategy(strategy), + ); + iterative.set_max_depth(max_depth); + iterative.choose_move(&b); + let iterative_value = iterative.root_value(); + assert_eq!( + value, iterative_value, + "search depth={}, strategy={:?}\n{}", + max_depth, strategy, b + ); + } } } } From 2e649f3cc8ae50dab524581f7a7f36b077b28ff0 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Thu, 18 Feb 2021 00:38:28 -0800 Subject: [PATCH 023/162] Add null window search option. --- benches/negamax.rs | 4 +--- src/strategies/iterative.rs | 38 +++++++++++++++++++++++++++++++++---- tests/strategies.rs | 3 ++- 3 files changed, 37 insertions(+), 8 deletions(-) diff --git a/benches/negamax.rs b/benches/negamax.rs index e28fc8f..a0c4e8c 100644 --- a/benches/negamax.rs +++ b/benches/negamax.rs @@ -20,9 +20,7 @@ fn bench_iterative(b: &mut Bencher) { let board = connect4::Board::default(); b.iter(|| { let mut s = IterativeSearch::::new( - IterativeOptions::new() - .with_table_byte_size(32_000) - .with_replacement_strategy(ReplacementStrategy::TwoTier), + IterativeOptions::new().with_table_byte_size(32_000).with_null_window_search(true), ); s.set_max_depth(5); let m = s.choose_move(&board); diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index c4f3e3f..bd14fa4 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -141,11 +141,16 @@ impl TranspositionTable { pub struct IterativeOptions { table_byte_size: usize, strategy: Replacement, + null_window_search: bool, } impl IterativeOptions { pub fn new() -> Self { - IterativeOptions { table_byte_size: 1_000_000, strategy: Replacement::TwoTier } + IterativeOptions { + table_byte_size: 1_000_000, + strategy: Replacement::TwoTier, + null_window_search: false, + } } } @@ -155,11 +160,18 @@ impl IterativeOptions { self.table_byte_size = size; self } + /// Approximately how large the transposition table should be in memory. pub fn with_replacement_strategy(mut self, strategy: Replacement) -> Self { self.strategy = strategy; self } + + /// Whether to add null-window searches to try to prune branches without a full search. + pub fn with_null_window_search(mut self, null: bool) -> Self { + self.null_window_search = null; + self + } } pub struct IterativeSearch { @@ -170,6 +182,8 @@ pub struct IterativeSearch { prev_value: Evaluation, _eval: PhantomData, + opts: IterativeOptions, + // Runtime stats for the last move generated. // Maximum depth used to produce the move. @@ -191,6 +205,7 @@ impl IterativeSearch { timeout: Arc::new(AtomicBool::new(false)), transposition_table: table, prev_value: 0, + opts: opts, _eval: PhantomData, actual_depth: 0, nodes_explored: 0, @@ -286,15 +301,31 @@ impl IterativeSearch { let mut best = WORST_EVAL; let mut best_move = moves[0].unwrap(); + let mut null_window = false; for m in moves.iter().take_while(|om| om.is_some()).map(|om| om.unwrap()) { m.apply(s); - let value = -self.negamax(s, depth - 1, -beta, -alpha)?; + let value = if null_window { + let probe = -self.negamax(s, depth - 1, -alpha - 1, -alpha)?; + if probe > alpha && probe < beta { + // Full search fallback. + -self.negamax(s, depth - 1, -beta, -alpha)? + } else { + probe + } + } else { + -self.negamax(s, depth - 1, -beta, -alpha)? + }; m.undo(s); if value > best { best = value; best_move = m; } - alpha = max(alpha, value); + if value > alpha { + alpha = value; + // Now that we've found a good move, assume following moves + // are worse, and seek to cull them without full evaluation. + null_window = self.opts.null_window_search; + } if alpha >= beta { break; } @@ -308,7 +339,6 @@ impl IterativeSearch { EntryFlag::Exact }; self.transposition_table.store(hash, best, depth, flag, best_move); - Some(best) } } diff --git a/tests/strategies.rs b/tests/strategies.rs index b8dd3f6..c7f2e80 100644 --- a/tests/strategies.rs +++ b/tests/strategies.rs @@ -130,7 +130,8 @@ fn compare_plain_negamax() { let mut iterative = minimax::IterativeSearch::::new( minimax::IterativeOptions::new() .with_table_byte_size(64000) - .with_replacement_strategy(strategy), + .with_replacement_strategy(strategy) + .with_null_window_search(true), ); iterative.set_max_depth(max_depth); iterative.choose_move(&b); From aee2546434235e72c0c901378bd81d3889f82623 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Sat, 20 Feb 2021 11:28:40 -0800 Subject: [PATCH 024/162] Add option for incrementing 2 depths at a time. Disabled in benchmark as it doesn't help for connect4. --- src/strategies/iterative.rs | 12 +++++++++++- tests/strategies.rs | 28 +++++++++++++++++----------- 2 files changed, 28 insertions(+), 12 deletions(-) diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index bd14fa4..d35c091 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -142,6 +142,7 @@ pub struct IterativeOptions { table_byte_size: usize, strategy: Replacement, null_window_search: bool, + step_increment: u8, } impl IterativeOptions { @@ -150,6 +151,7 @@ impl IterativeOptions { table_byte_size: 1_000_000, strategy: Replacement::TwoTier, null_window_search: false, + step_increment: 1, } } } @@ -172,6 +174,12 @@ impl IterativeOptions { self.null_window_search = null; self } + + /// Increment the depth by two between iterations. + pub fn with_double_step_increment(mut self) -> Self { + self.step_increment = 2; + self + } } pub struct IterativeSearch { @@ -367,7 +375,8 @@ where let mut s_clone = s.clone(); let mut best_move = None; - for depth in 0..=self.max_depth as u8 { + let mut depth = self.max_depth as u8 % self.opts.step_increment; + while depth <= self.max_depth as u8 { if self.negamax(&mut s_clone, depth + 1, WORST_EVAL, BEST_EVAL).is_none() { // Timeout. Return the best move from the previous depth. break; @@ -379,6 +388,7 @@ where self.nodes_explored += self.next_depth_nodes; self.prev_value = entry.value; self.next_depth_nodes = 0; + depth += self.opts.step_increment; } self.wall_time = start_time.elapsed(); best_move diff --git a/tests/strategies.rs b/tests/strategies.rs index c7f2e80..2696f59 100644 --- a/tests/strategies.rs +++ b/tests/strategies.rs @@ -122,24 +122,30 @@ fn compare_plain_negamax() { let negamax_value = negamax.root_value(); assert_eq!(value, negamax_value, "search depth={}\n{}", max_depth, b); - for &strategy in &[ - minimax::Replacement::Always, - minimax::Replacement::DepthPreferred, - minimax::Replacement::TwoTier, - ] { + // Sampling of the configuration space. + for (option_num, opt) in vec![ + minimax::IterativeOptions::new() + .with_replacement_strategy(minimax::Replacement::DepthPreferred) + .with_null_window_search(true), + minimax::IterativeOptions::new() + .with_replacement_strategy(minimax::Replacement::Always) + .with_double_step_increment(), + minimax::IterativeOptions::new() + .with_replacement_strategy(minimax::Replacement::TwoTier), + ] + .drain(..) + .enumerate() + { let mut iterative = minimax::IterativeSearch::::new( - minimax::IterativeOptions::new() - .with_table_byte_size(64000) - .with_replacement_strategy(strategy) - .with_null_window_search(true), + opt.with_table_byte_size(64000), ); iterative.set_max_depth(max_depth); iterative.choose_move(&b); let iterative_value = iterative.root_value(); assert_eq!( value, iterative_value, - "search depth={}, strategy={:?}\n{}", - max_depth, strategy, b + "search depth={}, option={}\n{}", + max_depth, option_num, b ); } } From 5a1553d67dc3eea5141ac542d88cbd53ac484918 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Sun, 21 Feb 2021 21:43:52 -0800 Subject: [PATCH 025/162] Implement optional quiescence search at the leaf nodes. Completely untested because I'm not sure what noisy moves would be in connect four. --- src/interface.rs | 14 ++++++++ src/strategies/iterative.rs | 67 +++++++++++++++++++++++++++++++++++-- 2 files changed, 78 insertions(+), 3 deletions(-) diff --git a/src/interface.rs b/src/interface.rs index ae70888..a2a7b38 100644 --- a/src/interface.rs +++ b/src/interface.rs @@ -93,6 +93,20 @@ pub trait Game: Sized { /// maximum number of moves. fn generate_moves(&Self::S, &mut [Option]) -> usize; + /// Optional interface to support strategies using quiescence search. + /// + /// A "noisy" move is a threatening move that requires a response. + /// + /// The term comes from chess, where capturing a piece is considered a noisy + /// move. Capturing a piece is often the first move out of an exchange of + /// captures. Evaluating the board state after only the first capture can + /// give a misleadingly high score. The solution is to continue the search + /// among only noisy moves and find the score once the board state settles. + fn generate_noisy_moves(&Self::S, &mut [Option]) -> Option { + // Default implementation indicates unimplemented. + None + } + /// Returns `Some(PlayerJustMoved)` or `Some(PlayerToMove)` if there's a winner, /// `Some(Draw)` if the state is terminal without a winner, and `None` if /// the state is non-terminal. diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index d35c091..f1dbd85 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -143,6 +143,7 @@ pub struct IterativeOptions { strategy: Replacement, null_window_search: bool, step_increment: u8, + max_quiescence_depth: u8, } impl IterativeOptions { @@ -152,6 +153,7 @@ impl IterativeOptions { strategy: Replacement::TwoTier, null_window_search: false, step_increment: 1, + max_quiescence_depth: 0, } } } @@ -180,6 +182,11 @@ impl IterativeOptions { self.step_increment = 2; self } + + pub fn with_quiescence_search_depth(mut self, depth: u8) -> Self { + self.max_quiescence_depth = depth; + self + } } pub struct IterativeSearch { @@ -251,6 +258,57 @@ impl IterativeSearch { self.prev_value } + fn check_noisy_search_capability(&mut self, s: &::S) + where + ::M: Copy, + { + if self.opts.max_quiescence_depth > 0 { + let mut moves = [None; 200]; + if E::G::generate_noisy_moves(s, &mut moves).is_none() { + panic!("Quiescence search requested, but this game has not implemented generate_noisy_moves."); + } + } + } + + // Negamax only among noisy moves. + fn noisy_negamax( + &mut self, s: &mut ::S, depth: u8, mut alpha: Evaluation, beta: Evaluation, + ) -> Option + where + ::M: Copy, + { + if self.timeout.load(Ordering::Relaxed) { + return None; + } + if let Some(winner) = E::G::get_winner(s) { + return Some(winner.evaluate()); + } + if depth == 0 { + return Some(E::evaluate(s)); + } + + let mut moves = [None; 200]; + // Depth is only allowed to be >0 if this game supports noisy moves. + let n = E::G::generate_noisy_moves(s, &mut moves).unwrap(); + if n == 0 { + // Only quiet moves remain, return leaf evaluation. + return Some(E::evaluate(s)); + } + + let mut best = WORST_EVAL; + for m in moves[..n].iter().map(|om| om.unwrap()) { + m.apply(s); + let value = -self.noisy_negamax(s, depth - 1, -beta, -alpha)?; + m.undo(s); + best = max(best, value); + alpha = max(alpha, value); + if alpha >= beta { + break; + } + } + Some(best) + } + // Recursively compute negamax on the game state. Returns None if it hits the timeout. fn negamax( &mut self, s: &mut ::S, depth: u8, mut alpha: Evaluation, @@ -266,12 +324,14 @@ impl IterativeSearch { self.next_depth_nodes += 1; + if depth == 0 { + // Evaluate quiescence search on leaf nodes. + // Will just return the node's evaluation if quiescence search is disabled. + return self.noisy_negamax(s, self.opts.max_quiescence_depth, alpha, beta); + } if let Some(winner) = E::G::get_winner(s) { return Some(winner.evaluate()); } - if depth == 0 { - return Some(E::evaluate(s)); - } let alpha_orig = alpha; let hash = s.zobrist_hash(); @@ -357,6 +417,7 @@ where ::M: Copy + Eq, { fn choose_move(&mut self, s: &::S) -> Option<::M> { + self.check_noisy_search_capability(s); self.transposition_table.advance_generation(); // Reset stats. self.nodes_explored = 0; From c41434ceef5814c5e294625763c6829e5b6fd7cf Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Sun, 21 Feb 2021 22:17:02 -0800 Subject: [PATCH 026/162] Compute the principal variation at each iteration. This revealed that the principal variation often runs through cutoff nodes, which is concerning... --- src/strategies/iterative.rs | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index f1dbd85..b75a06f 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -34,7 +34,7 @@ pub enum Replacement { // TODO: Bucket(size) } -#[derive(Copy, Clone, Eq, PartialEq)] +#[derive(Copy, Clone, Debug, Eq, PartialEq)] enum EntryFlag { Exact, Upperbound, @@ -208,6 +208,7 @@ pub struct IterativeSearch { // Nodes explored past this depth, and thus this is thrown away work. next_depth_nodes: usize, table_hits: usize, + pv: Vec<::M>, wall_time: Duration, } @@ -226,6 +227,7 @@ impl IterativeSearch { nodes_explored: 0, next_depth_nodes: 0, table_hits: 0, + pv: Vec::new(), wall_time: Duration::default(), } } @@ -258,6 +260,38 @@ impl IterativeSearch { self.prev_value } + // After finishing a search, populate the principal variation as deep as + // the table remembers it. + fn populate_pv(&mut self, s: &mut ::S) + where + ::S: Zobrist, + ::M: Copy, + { + self.pv.clear(); + let mut hash = s.zobrist_hash(); + while let Some(m) = + self.transposition_table.lookup(hash).map(|entry| entry.best_move).flatten() + { + // The principal variation should only have exact nodes, as other + // node types are from cutoffs where the node is proven to be + // worse than a previously explored one. + // TODO: debug_assert_eq!(entry.flag, EntryFlag::Exact); + self.pv.push(m); + m.apply(s); + hash = s.zobrist_hash(); + } + // Restore state. + for m in self.pv.iter().rev() { + m.undo(s); + } + } + + /// Return what the engine considered to be the best sequence of moves + /// from both sides. + pub fn principal_variation(&self) -> &[::M] { + &self.pv[..] + } + fn check_noisy_search_capability(&mut self, s: &::S) where ::M: Copy, @@ -450,6 +484,7 @@ where self.prev_value = entry.value; self.next_depth_nodes = 0; depth += self.opts.step_increment; + self.populate_pv(&mut s_clone); } self.wall_time = start_time.elapsed(); best_move From e4f135d8c71ad62dad9b3df213cdbfb8080ef698 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Sun, 21 Feb 2021 22:59:01 -0800 Subject: [PATCH 027/162] Refactor table bookkeeping out of giant negamax function body. --- src/strategies/iterative.rs | 90 ++++++++++++++++++++++++------------- 1 file changed, 58 insertions(+), 32 deletions(-) diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index b75a06f..a848c4b 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -343,6 +343,54 @@ impl IterativeSearch { Some(best) } + // Check and update negamax state based on any transposition table hit. + #[inline] + fn table_check( + &mut self, hash: u64, depth: u8, good_move: &mut Option<::M>, + alpha: &mut Evaluation, beta: &mut Evaluation, + ) -> Option + where + ::M: Copy, + { + if let Some(entry) = self.transposition_table.lookup(hash) { + *good_move = entry.best_move; + self.table_hits += 1; + if entry.depth >= depth { + match entry.flag { + EntryFlag::Exact => { + return Some(entry.value); + } + EntryFlag::Lowerbound => { + *alpha = max(*alpha, entry.value); + } + EntryFlag::Upperbound => { + *beta = min(*beta, entry.value); + } + } + if *alpha >= *beta { + return Some(entry.value); + } + } + } + None + } + + // Update table based on negamax results. + #[inline(always)] + fn table_update( + &mut self, hash: u64, alpha_orig: Evaluation, beta: Evaluation, depth: u8, + best: Evaluation, best_move: ::M, + ) { + let flag = if best <= alpha_orig { + EntryFlag::Upperbound + } else if best >= beta { + EntryFlag::Lowerbound + } else { + EntryFlag::Exact + }; + self.transposition_table.store(hash, best, depth, flag, best_move); + } + // Recursively compute negamax on the game state. Returns None if it hits the timeout. fn negamax( &mut self, s: &mut ::S, depth: u8, mut alpha: Evaluation, @@ -370,34 +418,19 @@ impl IterativeSearch { let alpha_orig = alpha; let hash = s.zobrist_hash(); let mut good_move = None; - if let Some(entry) = self.transposition_table.lookup(hash) { - good_move = entry.best_move; - self.table_hits += 1; - if entry.depth >= depth { - match entry.flag { - EntryFlag::Exact => { - return Some(entry.value); - } - EntryFlag::Lowerbound => { - alpha = max(alpha, entry.value); - } - EntryFlag::Upperbound => { - beta = min(beta, entry.value); - } - } - if alpha >= beta { - return Some(entry.value); - } - } + if let Some(value) = self.table_check(hash, depth, &mut good_move, &mut alpha, &mut beta) { + return Some(value); } let mut moves = [None; 200]; let n = E::G::generate_moves(s, &mut moves); - // Rearrange so predicted good move is first. - for i in 0..n { - if moves[i] == good_move { - moves.swap(0, i); - break; + if good_move.is_some() { + // Rearrange so predicted good move is first. + for i in 0..n { + if moves[i] == good_move { + moves.swap(0, i); + break; + } } } @@ -433,14 +466,7 @@ impl IterativeSearch { } } - let flag = if best <= alpha_orig { - EntryFlag::Upperbound - } else if best >= beta { - EntryFlag::Lowerbound - } else { - EntryFlag::Exact - }; - self.transposition_table.store(hash, best, depth, flag, best_move); + self.table_update(hash, alpha_orig, beta, depth, best, best_move); Some(best) } } From 88192efea2ef55b8ce2fa1c2d3ce27f9479284df Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Tue, 23 Feb 2021 11:47:37 -0800 Subject: [PATCH 028/162] Port value clamping to IterativeSearch. --- src/strategies/iterative.rs | 5 +++-- src/strategies/mod.rs | 1 + src/strategies/negamax.rs | 25 +++---------------------- src/strategies/util.rs | 25 +++++++++++++++++++++++++ 4 files changed, 32 insertions(+), 24 deletions(-) create mode 100644 src/strategies/util.rs diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index a848c4b..c7fe186 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -5,6 +5,7 @@ //! a transposition table to reuse information from previous iterations. use super::super::interface::*; +use super::util::*; use std::cmp::{max, min}; use std::marker::PhantomData; @@ -257,7 +258,7 @@ impl IterativeSearch { #[doc(hidden)] pub fn root_value(&self) -> Evaluation { - self.prev_value + unclamp_value(self.prev_value) } // After finishing a search, populate the principal variation as deep as @@ -467,7 +468,7 @@ impl IterativeSearch { } self.table_update(hash, alpha_orig, beta, depth, best, best_move); - Some(best) + Some(clamp_value(best)) } } diff --git a/src/strategies/mod.rs b/src/strategies/mod.rs index 522dc39..acb7ee9 100644 --- a/src/strategies/mod.rs +++ b/src/strategies/mod.rs @@ -3,3 +3,4 @@ pub mod iterative; pub mod negamax; pub mod random; +mod util; diff --git a/src/strategies/negamax.rs b/src/strategies/negamax.rs index 2885d87..78cbaed 100644 --- a/src/strategies/negamax.rs +++ b/src/strategies/negamax.rs @@ -4,24 +4,12 @@ //! the "best" moves, so that it's non-deterministic. use super::super::interface::*; +use super::util::*; use rand; use rand::Rng; use std::cmp::max; use std::marker::PhantomData; -// For values near winning and losing values, push them slightly closer to zero. -// A win in 3 moves (BEST-3) will be chosen over a win in 5 moves (BEST-5). -// A loss in 5 moves (WORST+5) will be chosen over a loss in 3 moves (WORST+3). -fn degrade_wins(value: Evaluation) -> Evaluation { - if value > BEST_EVAL - 100 { - value - 1 - } else if value < WORST_EVAL + 100 { - value + 1 - } else { - value - } -} - fn negamax( s: &mut ::S, depth: usize, mut alpha: Evaluation, beta: Evaluation, ) -> Evaluation @@ -47,7 +35,7 @@ where break; } } - degrade_wins(best) + clamp_value(best) } pub struct Negamax { @@ -64,14 +52,7 @@ impl Negamax { #[doc(hidden)] pub fn root_value(&self) -> Evaluation { - // Undo any value clamping. - if self.prev_value > BEST_EVAL - 100 { - BEST_EVAL - } else if self.prev_value < WORST_EVAL + 100 { - WORST_EVAL - } else { - self.prev_value - } + unclamp_value(self.prev_value) } } diff --git a/src/strategies/util.rs b/src/strategies/util.rs new file mode 100644 index 0000000..b21778b --- /dev/null +++ b/src/strategies/util.rs @@ -0,0 +1,25 @@ +use super::super::interface::*; + +// For values near winning and losing values, push them slightly closer to zero. +// A win in 3 moves (BEST-3) will be chosen over a win in 5 moves (BEST-5). +// A loss in 5 moves (WORST+5) will be chosen over a loss in 3 moves (WORST+3). +pub(super) fn clamp_value(value: Evaluation) -> Evaluation { + if value > BEST_EVAL - 100 { + value - 1 + } else if value < WORST_EVAL + 100 { + value + 1 + } else { + value + } +} + +// Undo any value clamping. +pub(super) fn unclamp_value(value: Evaluation) -> Evaluation { + if value > BEST_EVAL - 100 { + BEST_EVAL + } else if value < WORST_EVAL + 100 { + WORST_EVAL + } else { + value + } +} From 379b1e206de8971930a3089b638f2f4b4942a682 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Tue, 23 Feb 2021 12:07:10 -0800 Subject: [PATCH 029/162] integration test: ensure strategies actually pick one of the best moves --- examples/connect4.rs | 2 +- tests/strategies.rs | 35 +++++++++++++++++++++++++++-------- 2 files changed, 28 insertions(+), 9 deletions(-) diff --git a/examples/connect4.rs b/examples/connect4.rs index 80e5660..909db6b 100644 --- a/examples/connect4.rs +++ b/examples/connect4.rs @@ -85,7 +85,7 @@ impl Display for Board { } } -#[derive(Copy, Clone, Eq, PartialEq)] +#[derive(Copy, Clone, Debug, Eq, PartialEq)] pub struct Place { col: u8, } diff --git a/tests/strategies.rs b/tests/strategies.rs index 2696f59..6c250e5 100644 --- a/tests/strategies.rs +++ b/tests/strategies.rs @@ -36,15 +36,17 @@ where best } -pub struct PlainNegamax { +pub struct PlainNegamax { depth: usize, root_value: Evaluation, + // All moves tied with the best valuation. + best_moves: Vec<::M>, _eval: PhantomData, } impl PlainNegamax { pub fn new(depth: usize) -> PlainNegamax { - PlainNegamax { depth: depth, root_value: 0, _eval: PhantomData } + PlainNegamax { depth: depth, root_value: 0, best_moves: Vec::new(), _eval: PhantomData } } } @@ -57,20 +59,23 @@ where let mut moves = [None; 200]; let n = E::G::generate_moves(s, &mut moves); - let mut best_move = None; + self.best_moves.clear(); let mut best_value = WORST_EVAL; let mut s_clone = s.clone(); for m in moves[..n].iter().map(|m| m.unwrap()) { m.apply(&mut s_clone); let value = -negamax::(&mut s_clone, self.depth); m.undo(&mut s_clone); - if value > best_value { + if value == best_value { + self.best_moves.push(m); + } else if value > best_value { best_value = value; - best_move = Some(m); + self.best_moves.clear(); + self.best_moves.push(m); } } self.root_value = best_value; - best_move + self.best_moves.first().map(|m| *m) } } @@ -118,9 +123,16 @@ fn compare_plain_negamax() { let value = plain_negamax.root_value; let mut negamax = minimax::Negamax::::with_max_depth(max_depth); - negamax.choose_move(&b); + let negamax_move = negamax.choose_move(&b).unwrap(); let negamax_value = negamax.root_value(); assert_eq!(value, negamax_value, "search depth={}\n{}", max_depth, b); + assert!( + plain_negamax.best_moves.contains(&negamax_move), + "bad move={:?}\nsearch depth={}\n{}", + negamax_move, + max_depth, + b + ); // Sampling of the configuration space. for (option_num, opt) in vec![ @@ -140,13 +152,20 @@ fn compare_plain_negamax() { opt.with_table_byte_size(64000), ); iterative.set_max_depth(max_depth); - iterative.choose_move(&b); + let iterative_move = iterative.choose_move(&b).unwrap(); let iterative_value = iterative.root_value(); assert_eq!( value, iterative_value, "search depth={}, option={}\n{}", max_depth, option_num, b ); + assert!( + plain_negamax.best_moves.contains(&iterative_move), + "bad move={:?}\nsearch depth={}\n{}", + iterative_move, + max_depth, + b + ); } } } From 27adf8d948aac0f6a1173e43bcdf0f9622352fb0 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Tue, 23 Feb 2021 21:10:06 -0800 Subject: [PATCH 030/162] Narrow window after failed scout probe. --- src/strategies/iterative.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index c7fe186..fa32bd6 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -270,13 +270,12 @@ impl IterativeSearch { { self.pv.clear(); let mut hash = s.zobrist_hash(); - while let Some(m) = - self.transposition_table.lookup(hash).map(|entry| entry.best_move).flatten() - { + while let Some(entry) = self.transposition_table.lookup(hash) { // The principal variation should only have exact nodes, as other // node types are from cutoffs where the node is proven to be // worse than a previously explored one. // TODO: debug_assert_eq!(entry.flag, EntryFlag::Exact); + let m = entry.best_move.unwrap(); self.pv.push(m); m.apply(s); hash = s.zobrist_hash(); @@ -444,7 +443,7 @@ impl IterativeSearch { let probe = -self.negamax(s, depth - 1, -alpha - 1, -alpha)?; if probe > alpha && probe < beta { // Full search fallback. - -self.negamax(s, depth - 1, -beta, -alpha)? + -self.negamax(s, depth - 1, -beta, -probe)? } else { probe } From 651cd62d188272753adacf241d944212128ff61a Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Tue, 23 Feb 2021 21:56:39 -0800 Subject: [PATCH 031/162] Resolve the mystery of the non-exact PV nodes. --- src/strategies/iterative.rs | 10 +++++++--- tests/strategies.rs | 2 +- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index fa32bd6..7661810 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -107,7 +107,7 @@ impl TranspositionTable { Replacement::DepthPreferred => { let index = (hash as usize) & self.mask; let entry = &self.table[index]; - if entry.generation != self.generation || entry.depth < depth { + if entry.generation != self.generation || entry.depth <= depth { Some(index) } else { None @@ -117,7 +117,7 @@ impl TranspositionTable { // index points to the first of a pair of entries, the depth-preferred entry and the always-replace entry. let index = (hash as usize) & self.mask; let entry = &self.table[index]; - if entry.generation != self.generation || entry.depth < depth { + if entry.generation != self.generation || entry.depth <= depth { Some(index) } else { Some(index + 1) @@ -274,7 +274,11 @@ impl IterativeSearch { // The principal variation should only have exact nodes, as other // node types are from cutoffs where the node is proven to be // worse than a previously explored one. - // TODO: debug_assert_eq!(entry.flag, EntryFlag::Exact); + // + // Sometimes, it takes multiple rounds of narrowing bounds for the + // value to be exact, and we can't guarantee that the table entry + // will remain in the table between the searches that find + // equivalent upper and lower bounds. let m = entry.best_move.unwrap(); self.pv.push(m); m.apply(s); diff --git a/tests/strategies.rs b/tests/strategies.rs index 6c250e5..e792192 100644 --- a/tests/strategies.rs +++ b/tests/strategies.rs @@ -114,7 +114,7 @@ fn generate_random_state(depth: usize) -> connect4::Board { #[test] fn compare_plain_negamax() { - for _ in 0..10 { + for _ in 0..100 { for max_depth in 0..5 { let b = generate_random_state(10); From 73086cfc0dcb137de1a0e923f238ca4c002b8a47 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Wed, 24 Feb 2021 11:28:51 -0800 Subject: [PATCH 032/162] Prevent infinitely long principal variations. --- src/strategies/iterative.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index 7661810..9c78e8b 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -263,7 +263,7 @@ impl IterativeSearch { // After finishing a search, populate the principal variation as deep as // the table remembers it. - fn populate_pv(&mut self, s: &mut ::S) + fn populate_pv(&mut self, s: &mut ::S, mut depth: u8) where ::S: Zobrist, ::M: Copy, @@ -283,6 +283,11 @@ impl IterativeSearch { self.pv.push(m); m.apply(s); hash = s.zobrist_hash(); + // Prevent cyclical PVs from being infinitely long. + if depth == 0 { + break; + } + depth -= 1; } // Restore state. for m in self.pv.iter().rev() { @@ -514,7 +519,7 @@ where self.prev_value = entry.value; self.next_depth_nodes = 0; depth += self.opts.step_increment; - self.populate_pv(&mut s_clone); + self.populate_pv(&mut s_clone, depth + 1); } self.wall_time = start_time.elapsed(); best_move From e9adb07df995bd65904d92e37bfca4a2cb784675 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Wed, 24 Feb 2021 14:45:53 -0800 Subject: [PATCH 033/162] Update new option documentation. Also default null window search to on. --- src/strategies/iterative.rs | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index 9c78e8b..25e3752 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -152,7 +152,7 @@ impl IterativeOptions { IterativeOptions { table_byte_size: 1_000_000, strategy: Replacement::TwoTier, - null_window_search: false, + null_window_search: true, step_increment: 1, max_quiescence_depth: 0, } @@ -166,13 +166,16 @@ impl IterativeOptions { self } - /// Approximately how large the transposition table should be in memory. + /// What rules to use when choosing whether to overwrite the current value + /// in the transposition table. pub fn with_replacement_strategy(mut self, strategy: Replacement) -> Self { self.strategy = strategy; self } - /// Whether to add null-window searches to try to prune branches without a full search. + /// Whether to add null-window searches to try to prune branches that are + /// probably worse than those already found. Also known as principal + /// variation search. pub fn with_null_window_search(mut self, null: bool) -> Self { self.null_window_search = null; self @@ -184,6 +187,10 @@ impl IterativeOptions { self } + /// Enable [quiescence + /// search](https://en.wikipedia.org/wiki/Quiescence_search) at the leaves + /// of the search tree. The Game must implement `generate_noisy_moves` + /// for the search to know when the state has become "quiet". pub fn with_quiescence_search_depth(mut self, depth: u8) -> Self { self.max_quiescence_depth = depth; self From 01265c6b2df134b5712878430bbc6ed9d1103232 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Wed, 24 Feb 2021 14:47:34 -0800 Subject: [PATCH 034/162] Update version to 0.1.1 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 44e228d..13ce9e2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "minimax" -version = "0.1.0" +version = "0.1.1" authors = [ "Samuel Fredrickson ", "Eric Roshan-Eisner " ] description = "Generic implementations of Minimax." documentation = "https://docs.rs/minimax" From c626e29d8c2a7d8b954293c452189d107717c80c Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Wed, 24 Feb 2021 16:07:49 -0800 Subject: [PATCH 035/162] Pass a Vec in the generate_moves API. This simplifies dealing with moves on both sides of the API and prevents out of bounds for games with a high branching factor. Backwards incompatible, so bumping the version to 0.2.0. The original reason for using a stack-allocated array of optional moves was to prevent allocations in the inner loop. However, it turns out to be easy to just cache all the Vecs and their backing storage, so the perf actually improves (around 1.1x), as you don't have to repeatedly set large chunks of the stack to the non-zero Option. --- Cargo.toml | 2 +- examples/connect4.rs | 8 +--- examples/ttt.rs | 8 +--- src/interface.rs | 19 +++----- src/strategies/iterative.rs | 45 +++++++++++++------ src/strategies/negamax.rs | 90 ++++++++++++++++++++++--------------- src/strategies/random.rs | 10 +++-- tests/strategies.rs | 18 ++++---- 8 files changed, 112 insertions(+), 88 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 13ce9e2..301bd95 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "minimax" -version = "0.1.1" +version = "0.2.0" authors = [ "Samuel Fredrickson ", "Eric Roshan-Eisner " ] description = "Generic implementations of Minimax." documentation = "https://docs.rs/minimax" diff --git a/examples/connect4.rs b/examples/connect4.rs index 909db6b..2548a37 100644 --- a/examples/connect4.rs +++ b/examples/connect4.rs @@ -125,18 +125,14 @@ impl minimax::Game for Game { type S = Board; type M = Place; - fn generate_moves(b: &Board, moves: &mut [Option]) -> usize { - let mut n = 0; + fn generate_moves(b: &Board, moves: &mut Vec) { let mut cols = b.all_pieces; for i in 0..NUM_COLS { if cols & COL_MASK < COL_MASK { - moves[n] = Some(Place { col: i as u8 }); - n += 1; + moves.push(Place { col: i as u8 }); } cols >>= HEIGHT; } - moves[n] = None; - n } fn get_winner(b: &Board) -> Option { diff --git a/examples/ttt.rs b/examples/ttt.rs index 564aef8..565ff0c 100644 --- a/examples/ttt.rs +++ b/examples/ttt.rs @@ -81,16 +81,12 @@ impl minimax::Game for Game { type S = Board; type M = Place; - fn generate_moves(b: &Board, ms: &mut [Option]) -> usize { - let mut j = 0; + fn generate_moves(b: &Board, ms: &mut Vec) { for i in 0..b.squares.len() { if b.squares[i] == Square::Empty { - ms[j] = Some(Place { i: i as u8 }); - j += 1; + ms.push(Place { i: i as u8 }); } } - ms[j] = None; - j } fn get_winner(b: &Board) -> Option { diff --git a/src/interface.rs b/src/interface.rs index a2a7b38..0d66872 100644 --- a/src/interface.rs +++ b/src/interface.rs @@ -82,18 +82,11 @@ pub trait Game: Sized { /// The type of game moves. type M: Move; - /// Generate moves at the given state. After finishing, the next entry in - /// the slice should be set to `None` to indicate the end. Returns the - /// number of moves generated. - /// - /// Currently, there's a deficiency that all strategies assume that at most - /// 200 moves may be generated for any position, which allows the underlying - /// memory for the slice to be a stack-allocated array. Once stable, this - /// trait will be extended with an associated constant to specify the - /// maximum number of moves. - fn generate_moves(&Self::S, &mut [Option]) -> usize; + /// Generate moves at the given state. + fn generate_moves(&Self::S, &mut Vec); - /// Optional interface to support strategies using quiescence search. + /// Optional interface to support strategies using quiescence + /// search. Return true when implemented. /// /// A "noisy" move is a threatening move that requires a response. /// @@ -102,9 +95,9 @@ pub trait Game: Sized { /// captures. Evaluating the board state after only the first capture can /// give a misleadingly high score. The solution is to continue the search /// among only noisy moves and find the score once the board state settles. - fn generate_noisy_moves(&Self::S, &mut [Option]) -> Option { + fn generate_noisy_moves(&Self::S, &mut Vec) -> bool { // Default implementation indicates unimplemented. - None + false } /// Returns `Some(PlayerJustMoved)` or `Some(PlayerToMove)` if there's a winner, diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index 25e3752..2fff755 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -202,6 +202,7 @@ pub struct IterativeSearch { max_time: Duration, timeout: Arc, transposition_table: TranspositionTable<<::G as Game>::M>, + move_list_pool: Vec::M>>, prev_value: Evaluation, _eval: PhantomData, @@ -228,6 +229,7 @@ impl IterativeSearch { max_time: Duration::from_secs(5), timeout: Arc::new(AtomicBool::new(false)), transposition_table: table, + move_list_pool: Vec::new(), prev_value: 0, opts: opts, _eval: PhantomData, @@ -308,15 +310,25 @@ impl IterativeSearch { &self.pv[..] } + fn new_move_list(&mut self) -> Vec<::M> { + self.move_list_pool.pop().unwrap_or_else(|| Vec::new()) + } + + fn free_move_list(&mut self, mut move_list: Vec<::M>) { + move_list.clear(); + self.move_list_pool.push(move_list); + } + fn check_noisy_search_capability(&mut self, s: &::S) where ::M: Copy, { if self.opts.max_quiescence_depth > 0 { - let mut moves = [None; 200]; - if E::G::generate_noisy_moves(s, &mut moves).is_none() { + let mut moves = self.new_move_list(); + if !E::G::generate_noisy_moves(s, &mut moves) { panic!("Quiescence search requested, but this game has not implemented generate_noisy_moves."); } + self.free_move_list(moves); } } @@ -337,16 +349,17 @@ impl IterativeSearch { return Some(E::evaluate(s)); } - let mut moves = [None; 200]; + let mut moves = self.new_move_list(); // Depth is only allowed to be >0 if this game supports noisy moves. - let n = E::G::generate_noisy_moves(s, &mut moves).unwrap(); - if n == 0 { + E::G::generate_noisy_moves(s, &mut moves); + if moves.is_empty() { // Only quiet moves remain, return leaf evaluation. + self.free_move_list(moves); return Some(E::evaluate(s)); } let mut best = WORST_EVAL; - for m in moves[..n].iter().map(|om| om.unwrap()) { + for m in moves.iter() { m.apply(s); let value = -self.noisy_negamax(s, depth - 1, -beta, -alpha)?; m.undo(s); @@ -356,6 +369,7 @@ impl IterativeSearch { break; } } + self.free_move_list(moves); Some(best) } @@ -438,12 +452,16 @@ impl IterativeSearch { return Some(value); } - let mut moves = [None; 200]; - let n = E::G::generate_moves(s, &mut moves); - if good_move.is_some() { + let mut moves = self.new_move_list(); + E::G::generate_moves(s, &mut moves); + if moves.is_empty() { + self.free_move_list(moves); + return Some(WORST_EVAL); + } + if let Some(good) = good_move { // Rearrange so predicted good move is first. - for i in 0..n { - if moves[i] == good_move { + for i in 0..moves.len() { + if moves[i] == good { moves.swap(0, i); break; } @@ -451,9 +469,9 @@ impl IterativeSearch { } let mut best = WORST_EVAL; - let mut best_move = moves[0].unwrap(); + let mut best_move = moves[0]; let mut null_window = false; - for m in moves.iter().take_while(|om| om.is_some()).map(|om| om.unwrap()) { + for &m in moves.iter() { m.apply(s); let value = if null_window { let probe = -self.negamax(s, depth - 1, -alpha - 1, -alpha)?; @@ -483,6 +501,7 @@ impl IterativeSearch { } self.table_update(hash, alpha_orig, beta, depth, best, best_move); + self.free_move_list(moves); Some(clamp_value(best)) } } diff --git a/src/strategies/negamax.rs b/src/strategies/negamax.rs index 78cbaed..f1a97e2 100644 --- a/src/strategies/negamax.rs +++ b/src/strategies/negamax.rs @@ -10,36 +10,9 @@ use rand::Rng; use std::cmp::max; use std::marker::PhantomData; -fn negamax( - s: &mut ::S, depth: usize, mut alpha: Evaluation, beta: Evaluation, -) -> Evaluation -where - <::G as Game>::M: Copy, -{ - if let Some(winner) = E::G::get_winner(s) { - return winner.evaluate(); - } - if depth == 0 { - return E::evaluate(s); - } - let mut moves = [None; 200]; - E::G::generate_moves(s, &mut moves); - let mut best = WORST_EVAL; - for m in moves.iter().take_while(|om| om.is_some()).map(|om| om.unwrap()) { - m.apply(s); - let value = -negamax::(s, depth - 1, -beta, -alpha); - m.undo(s); - best = max(best, value); - alpha = max(alpha, value); - if alpha >= beta { - break; - } - } - clamp_value(best) -} - -pub struct Negamax { +pub struct Negamax { max_depth: usize, + move_list_pool: Vec::M>>, rng: rand::ThreadRng, prev_value: Evaluation, _eval: PhantomData, @@ -47,13 +20,57 @@ pub struct Negamax { impl Negamax { pub fn with_max_depth(depth: usize) -> Negamax { - Negamax { max_depth: depth, rng: rand::thread_rng(), prev_value: 0, _eval: PhantomData } + Negamax { + max_depth: depth, + move_list_pool: Vec::new(), + rng: rand::thread_rng(), + prev_value: 0, + _eval: PhantomData, + } } #[doc(hidden)] pub fn root_value(&self) -> Evaluation { unclamp_value(self.prev_value) } + + fn new_move_list(&mut self) -> Vec<::M> { + self.move_list_pool.pop().unwrap_or_else(|| Vec::new()) + } + + fn free_move_list(&mut self, mut move_list: Vec<::M>) { + move_list.clear(); + self.move_list_pool.push(move_list); + } + + fn negamax( + &mut self, s: &mut ::S, depth: usize, mut alpha: Evaluation, beta: Evaluation, + ) -> Evaluation + where + <::G as Game>::M: Copy, + { + if let Some(winner) = E::G::get_winner(s) { + return winner.evaluate(); + } + if depth == 0 { + return E::evaluate(s); + } + let mut moves = self.new_move_list(); + E::G::generate_moves(s, &mut moves); + let mut best = WORST_EVAL; + for m in moves.iter() { + m.apply(s); + let value = -self.negamax(s, depth - 1, -beta, -alpha); + m.undo(s); + best = max(best, value); + alpha = max(alpha, value); + if alpha >= beta { + break; + } + } + self.free_move_list(moves); + clamp_value(best) + } } impl Strategy for Negamax @@ -63,18 +80,18 @@ where { fn choose_move(&mut self, s: &::S) -> Option<::M> { let mut best = WORST_EVAL; - let mut moves = [None; 200]; - let n = E::G::generate_moves(s, &mut moves); + let mut moves = self.new_move_list(); + E::G::generate_moves(s, &mut moves); // Randomly permute order that we look at the moves. // We'll pick the first best score from this list. - self.rng.shuffle(&mut moves[..n]); + self.rng.shuffle(&mut moves[..]); - let mut best_move = moves[0]?; + let mut best_move = *moves.first()?; let mut s_clone = s.clone(); - for m in moves.iter().take_while(|m| m.is_some()).map(|m| m.unwrap()) { + for &m in moves.iter() { // determine value for this move m.apply(&mut s_clone); - let value = -negamax::(&mut s_clone, self.max_depth, WORST_EVAL, -best); + let value = -self.negamax(&mut s_clone, self.max_depth, WORST_EVAL, -best); m.undo(&mut s_clone); // Strictly better than any move found so far. if value > best { @@ -82,6 +99,7 @@ where best_move = m; } } + self.free_move_list(moves); self.prev_value = best; Some(best_move) } diff --git a/src/strategies/random.rs b/src/strategies/random.rs index 7e34b7c..7c71285 100644 --- a/src/strategies/random.rs +++ b/src/strategies/random.rs @@ -19,10 +19,12 @@ where G::M: Copy, { fn choose_move(&mut self, s: &G::S) -> Option { - let mut moves: [Option; 200] = [None; 200]; - match G::generate_moves(s, &mut moves) { - 0 => None, - num_moves => Some(moves[self.rng.gen_range(0, num_moves)].unwrap()), + let mut moves = Vec::new(); + G::generate_moves(s, &mut moves); + if moves.is_empty() { + None + } else { + Some(moves[self.rng.gen_range(0, moves.len())]) } } } diff --git a/tests/strategies.rs b/tests/strategies.rs index e792192..682cdc8 100644 --- a/tests/strategies.rs +++ b/tests/strategies.rs @@ -24,10 +24,10 @@ where if depth == 0 { return E::evaluate(s); } - let mut moves = [None; 200]; - let n = E::G::generate_moves(s, &mut moves); + let mut moves = Vec::new(); + E::G::generate_moves(s, &mut moves); let mut best = WORST_EVAL; - for m in moves[..n].iter().map(|m| m.unwrap()) { + for m in moves.iter() { m.apply(s); let value = -negamax::(s, depth - 1); m.undo(s); @@ -56,13 +56,13 @@ where ::M: Copy, { fn choose_move(&mut self, s: &::S) -> Option<::M> { - let mut moves = [None; 200]; - let n = E::G::generate_moves(s, &mut moves); + let mut moves = Vec::new(); + E::G::generate_moves(s, &mut moves); self.best_moves.clear(); let mut best_value = WORST_EVAL; let mut s_clone = s.clone(); - for m in moves[..n].iter().map(|m| m.unwrap()) { + for &m in moves.iter() { m.apply(&mut s_clone); let value = -negamax::(&mut s_clone, self.depth); m.undo(&mut s_clone); @@ -100,9 +100,9 @@ fn generate_random_state(depth: usize) -> connect4::Board { let mut rng = rand::thread_rng(); let mut b = connect4::Board::default(); for _ in 0..depth { - let mut moves = [None; 10]; - let n = connect4::Game::generate_moves(&b, &mut moves); - let m = moves[rng.gen_range(0, n)].unwrap(); + let mut moves = Vec::new(); + connect4::Game::generate_moves(&b, &mut moves); + let m = moves[rng.gen_range(0, moves.len())]; m.apply(&mut b); if connect4::Game::get_winner(&b).is_some() { // Oops, undo and try again on the next iter. From f2f4a2f3dc39bf3a3faf6cfeb026897e8aeb24d9 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Wed, 24 Feb 2021 22:28:23 -0800 Subject: [PATCH 036/162] Add a generic perft function for benchmarking move generation. --- examples/connect4.rs | 25 +++++---------- src/lib.rs | 1 + src/strategies/iterative.rs | 30 +++++++---------- src/strategies/negamax.rs | 22 ++++--------- src/util.rs | 64 ++++++++++++++++++++++++++++++++++++- 5 files changed, 90 insertions(+), 52 deletions(-) diff --git a/examples/connect4.rs b/examples/connect4.rs index 2548a37..f11b7bd 100644 --- a/examples/connect4.rs +++ b/examples/connect4.rs @@ -5,7 +5,7 @@ extern crate minimax; use std::default::Default; use std::fmt::{Display, Formatter, Result}; -use std::time::{Duration, Instant}; +use std::time::Duration; #[derive(Clone)] pub struct Board { @@ -244,26 +244,17 @@ impl minimax::Evaluator for BasicEvaluator { } } -fn game_benchmark(strategy: &mut dyn minimax::Strategy) -> Duration { - // To not depend on deterministic moves, maybe trace the moves of a fixed game. - use minimax::{Game, Move}; - - let start = Instant::now(); - let mut b = Board::default(); - while self::Game::get_winner(&b).is_none() { - match strategy.choose_move(&mut b) { - Some(m) => m.apply(&mut b), - None => break, - } - } - start.elapsed() -} - fn main() { - use minimax::{Game, Move, Strategy}; + use minimax::{perft, Game, Move, Strategy}; use minimax::{IterativeOptions, IterativeSearch, Negamax}; let mut b = Board::default(); + + if std::env::args().any(|arg| arg == "perft") { + perft::(&mut b, 10); + return; + } + let opts = IterativeOptions::new() .with_table_byte_size(1_000_000) .with_replacement_strategy(minimax::Replacement::DepthPreferred); diff --git a/src/lib.rs b/src/lib.rs index ba991dd..dd19105 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,3 +7,4 @@ pub mod util; pub use interface::{Evaluation, Evaluator, Game, Move, Strategy, Winner, Zobrist}; pub use strategies::iterative::{IterativeOptions, IterativeSearch, Replacement}; pub use strategies::negamax::Negamax; +pub use util::perft; diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index 2fff755..6f2c2f7 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -5,6 +5,7 @@ //! a transposition table to reuse information from previous iterations. use super::super::interface::*; +use super::super::util::*; use super::util::*; use std::cmp::{max, min}; @@ -202,7 +203,7 @@ pub struct IterativeSearch { max_time: Duration, timeout: Arc, transposition_table: TranspositionTable<<::G as Game>::M>, - move_list_pool: Vec::M>>, + move_pool: MovePool<::M>, prev_value: Evaluation, _eval: PhantomData, @@ -229,7 +230,7 @@ impl IterativeSearch { max_time: Duration::from_secs(5), timeout: Arc::new(AtomicBool::new(false)), transposition_table: table, - move_list_pool: Vec::new(), + move_pool: MovePool::<_>::default(), prev_value: 0, opts: opts, _eval: PhantomData, @@ -310,25 +311,16 @@ impl IterativeSearch { &self.pv[..] } - fn new_move_list(&mut self) -> Vec<::M> { - self.move_list_pool.pop().unwrap_or_else(|| Vec::new()) - } - - fn free_move_list(&mut self, mut move_list: Vec<::M>) { - move_list.clear(); - self.move_list_pool.push(move_list); - } - fn check_noisy_search_capability(&mut self, s: &::S) where ::M: Copy, { if self.opts.max_quiescence_depth > 0 { - let mut moves = self.new_move_list(); + let mut moves = self.move_pool.new(); if !E::G::generate_noisy_moves(s, &mut moves) { panic!("Quiescence search requested, but this game has not implemented generate_noisy_moves."); } - self.free_move_list(moves); + self.move_pool.free(moves); } } @@ -349,12 +341,12 @@ impl IterativeSearch { return Some(E::evaluate(s)); } - let mut moves = self.new_move_list(); + let mut moves = self.move_pool.new(); // Depth is only allowed to be >0 if this game supports noisy moves. E::G::generate_noisy_moves(s, &mut moves); if moves.is_empty() { // Only quiet moves remain, return leaf evaluation. - self.free_move_list(moves); + self.move_pool.free(moves); return Some(E::evaluate(s)); } @@ -369,7 +361,7 @@ impl IterativeSearch { break; } } - self.free_move_list(moves); + self.move_pool.free(moves); Some(best) } @@ -452,10 +444,10 @@ impl IterativeSearch { return Some(value); } - let mut moves = self.new_move_list(); + let mut moves = self.move_pool.new(); E::G::generate_moves(s, &mut moves); if moves.is_empty() { - self.free_move_list(moves); + self.move_pool.free(moves); return Some(WORST_EVAL); } if let Some(good) = good_move { @@ -501,7 +493,7 @@ impl IterativeSearch { } self.table_update(hash, alpha_orig, beta, depth, best, best_move); - self.free_move_list(moves); + self.move_pool.free(moves); Some(clamp_value(best)) } } diff --git a/src/strategies/negamax.rs b/src/strategies/negamax.rs index f1a97e2..7ca7252 100644 --- a/src/strategies/negamax.rs +++ b/src/strategies/negamax.rs @@ -4,6 +4,7 @@ //! the "best" moves, so that it's non-deterministic. use super::super::interface::*; +use super::super::util::*; use super::util::*; use rand; use rand::Rng; @@ -12,7 +13,7 @@ use std::marker::PhantomData; pub struct Negamax { max_depth: usize, - move_list_pool: Vec::M>>, + move_pool: MovePool<::M>, rng: rand::ThreadRng, prev_value: Evaluation, _eval: PhantomData, @@ -22,7 +23,7 @@ impl Negamax { pub fn with_max_depth(depth: usize) -> Negamax { Negamax { max_depth: depth, - move_list_pool: Vec::new(), + move_pool: MovePool::<_>::default(), rng: rand::thread_rng(), prev_value: 0, _eval: PhantomData, @@ -34,15 +35,6 @@ impl Negamax { unclamp_value(self.prev_value) } - fn new_move_list(&mut self) -> Vec<::M> { - self.move_list_pool.pop().unwrap_or_else(|| Vec::new()) - } - - fn free_move_list(&mut self, mut move_list: Vec<::M>) { - move_list.clear(); - self.move_list_pool.push(move_list); - } - fn negamax( &mut self, s: &mut ::S, depth: usize, mut alpha: Evaluation, beta: Evaluation, ) -> Evaluation @@ -55,7 +47,7 @@ impl Negamax { if depth == 0 { return E::evaluate(s); } - let mut moves = self.new_move_list(); + let mut moves = self.move_pool.new(); E::G::generate_moves(s, &mut moves); let mut best = WORST_EVAL; for m in moves.iter() { @@ -68,7 +60,7 @@ impl Negamax { break; } } - self.free_move_list(moves); + self.move_pool.free(moves); clamp_value(best) } } @@ -80,7 +72,7 @@ where { fn choose_move(&mut self, s: &::S) -> Option<::M> { let mut best = WORST_EVAL; - let mut moves = self.new_move_list(); + let mut moves = self.move_pool.new(); E::G::generate_moves(s, &mut moves); // Randomly permute order that we look at the moves. // We'll pick the first best score from this list. @@ -99,7 +91,7 @@ where best_move = m; } } - self.free_move_list(moves); + self.move_pool.free(moves); self.prev_value = best; Some(best_move) } diff --git a/src/util.rs b/src/util.rs index ed8288d..7fa93ac 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1,8 +1,9 @@ //! Utility functions for testing, and tests. use super::interface; -use super::interface::Move; +use super::interface::{Game, Move}; use std::default::Default; +use std::time::Instant; /// Play a complete, new game with players using the two provided strategies. /// @@ -32,3 +33,64 @@ where interface::Winner::PlayerToMove => Some(s), } } + +pub(crate) struct MovePool { + pool: Vec>, +} + +impl Default for MovePool { + fn default() -> Self { + Self { pool: Vec::new() } + } +} + +impl MovePool { + pub(crate) fn new(&mut self) -> Vec { + self.pool.pop().unwrap_or_else(|| Vec::new()) + } + + pub(crate) fn free(&mut self, mut vec: Vec) { + vec.clear(); + self.pool.push(vec); + } +} + +fn perft_recurse(pool: &mut MovePool, state: &mut G::S, depth: usize) -> u64 { + if depth == 0 { + return 1; + } + if G::get_winner(state).is_some() { + // Apparently perft rules only count positions at the target depth. + return 0; + } + let mut moves = pool.new(); + G::generate_moves(state, &mut moves); + let n = if depth == 1 { + moves.len() as u64 + } else { + let mut count = 0; + for m in moves.iter() { + m.apply(state); + count += perft_recurse::(pool, state, depth - 1); + m.undo(state); + } + count + }; + pool.free(moves); + n +} + +pub fn perft(state: &mut ::S, max_depth: usize) -> Vec { + println!("depth count time kn/s"); + let mut pool = MovePool::::default(); + let mut counts = Vec::new(); + for depth in 0..max_depth+1 { + let start = Instant::now(); + let count = perft_recurse::(&mut pool, state, depth); + let dur = start.elapsed(); + let rate = count as f64 / dur.as_secs_f64() / 1000.0; + println!("{:>5} {:>15} {:>11} {:>11.1}", depth, count, format!("{:.1?}", dur), rate); + counts.push(count); + } + counts +} From c12e30358076bee86535f8048e2a5332f82af582 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Thu, 25 Feb 2021 08:53:55 -0800 Subject: [PATCH 037/162] Add mean and effective branching factors to stats. --- src/strategies/iterative.rs | 36 ++++++++++++++++++++++++++---------- src/util.rs | 2 +- 2 files changed, 27 insertions(+), 11 deletions(-) diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index 6f2c2f7..4e69c9f 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -213,10 +213,14 @@ pub struct IterativeSearch { // Maximum depth used to produce the move. actual_depth: u8, - // Nodes explored up to this depth. - nodes_explored: usize, - // Nodes explored past this depth, and thus this is thrown away work. - next_depth_nodes: usize, + // Nodes explored at each depth. + nodes_explored: Vec, + // Nodes explored past this depth, and thus only useful for filling TT for + // next choose_move. + next_depth_nodes: u64, + // For computing the average branching factor. + total_generate_move_calls: u64, + total_generated_moves: u64, table_hits: usize, pv: Vec<::M>, wall_time: Duration, @@ -235,8 +239,10 @@ impl IterativeSearch { opts: opts, _eval: PhantomData, actual_depth: 0, - nodes_explored: 0, + nodes_explored: Vec::new(), next_depth_nodes: 0, + total_generate_move_calls: 0, + total_generated_moves: 0, table_hits: 0, pv: Vec::new(), wall_time: Duration::default(), @@ -260,10 +266,16 @@ impl IterativeSearch { /// Return a human-readable summary of the last move generation. pub fn stats(&self) -> String { + let total_nodes_explored: u64 = self.nodes_explored.iter().sum(); + let mean_branching_factor = + self.total_generated_moves as f64 / self.total_generate_move_calls as f64; + let effective_branching_factor = (*self.nodes_explored.last().unwrap_or(&0) as f64) + .powf((self.actual_depth as f64 + 1.0).recip()); let throughput = - (self.nodes_explored + self.next_depth_nodes) as f64 / self.wall_time.as_secs_f64(); - format!("Explored {} nodes to depth {}.\nInterrupted exploration of next depth explored {} nodes.\n{} transposition table hits.\n{} nodes/sec", - self.nodes_explored, self.actual_depth, self.next_depth_nodes, self.table_hits, throughput as usize) + (total_nodes_explored + self.next_depth_nodes) as f64 / self.wall_time.as_secs_f64(); + format!("Explored {} nodes to depth {}. MBF={:.1} EBF={:.1}\nPartial exploration of next depth hit {} nodes.\n{} transposition table hits.\n{} nodes/sec", + total_nodes_explored, self.actual_depth, mean_branching_factor, effective_branching_factor, + self.next_depth_nodes, self.table_hits, throughput as usize) } #[doc(hidden)] @@ -446,6 +458,8 @@ impl IterativeSearch { let mut moves = self.move_pool.new(); E::G::generate_moves(s, &mut moves); + self.total_generate_move_calls += 1; + self.total_generated_moves += moves.len() as u64; if moves.is_empty() { self.move_pool.free(moves); return Some(WORST_EVAL); @@ -507,8 +521,10 @@ where self.check_noisy_search_capability(s); self.transposition_table.advance_generation(); // Reset stats. - self.nodes_explored = 0; + self.nodes_explored.clear(); self.next_depth_nodes = 0; + self.total_generate_move_calls = 0; + self.total_generated_moves = 0; self.actual_depth = 0; self.table_hits = 0; let start_time = Instant::now(); @@ -533,7 +549,7 @@ where best_move = entry.best_move; self.actual_depth = max(self.actual_depth, depth); - self.nodes_explored += self.next_depth_nodes; + self.nodes_explored.push(self.next_depth_nodes); self.prev_value = entry.value; self.next_depth_nodes = 0; depth += self.opts.step_increment; diff --git a/src/util.rs b/src/util.rs index 7fa93ac..1871497 100644 --- a/src/util.rs +++ b/src/util.rs @@ -84,7 +84,7 @@ pub fn perft(state: &mut ::S, max_depth: usize) -> Vec println!("depth count time kn/s"); let mut pool = MovePool::::default(); let mut counts = Vec::new(); - for depth in 0..max_depth+1 { + for depth in 0..max_depth + 1 { let start = Instant::now(); let count = perft_recurse::(&mut pool, state, depth); let dur = start.elapsed(); From 9878128db39536fcdc5a978fe248cf9f95604617 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Tue, 2 Mar 2021 12:13:08 -0800 Subject: [PATCH 038/162] Fix clippy lints. --- src/strategies/iterative.rs | 24 +++++++++++++++--------- src/strategies/negamax.rs | 4 ++-- src/strategies/random.rs | 6 ++++++ src/util.rs | 10 +++++----- 4 files changed, 28 insertions(+), 16 deletions(-) diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index 4e69c9f..90c5f32 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -78,7 +78,7 @@ impl TranspositionTable { best_move: None, }); } - Self { table: table, mask: mask, generation: 0, strategy: strategy } + Self { table, mask, generation: 0, strategy } } fn advance_generation(&mut self) { @@ -127,10 +127,10 @@ impl TranspositionTable { }; if let Some(index) = dest { self.table[index] = Entry { - hash: hash, - value: value, - depth: depth, - flag: flag, + hash, + value, + depth, + flag, generation: self.generation, best_move: Some(best_move), } @@ -160,6 +160,12 @@ impl IterativeOptions { } } +impl Default for IterativeOptions { + fn default() -> Self { + Self::new() + } +} + impl IterativeOptions { /// Approximately how large the transposition table should be in memory. pub fn with_table_byte_size(mut self, size: usize) -> Self { @@ -236,7 +242,7 @@ impl IterativeSearch { transposition_table: table, move_pool: MovePool::<_>::default(), prev_value: 0, - opts: opts, + opts, _eval: PhantomData, actual_depth: 0, nodes_explored: Vec::new(), @@ -328,7 +334,7 @@ impl IterativeSearch { ::M: Copy, { if self.opts.max_quiescence_depth > 0 { - let mut moves = self.move_pool.new(); + let mut moves = self.move_pool.alloc(); if !E::G::generate_noisy_moves(s, &mut moves) { panic!("Quiescence search requested, but this game has not implemented generate_noisy_moves."); } @@ -353,7 +359,7 @@ impl IterativeSearch { return Some(E::evaluate(s)); } - let mut moves = self.move_pool.new(); + let mut moves = self.move_pool.alloc(); // Depth is only allowed to be >0 if this game supports noisy moves. E::G::generate_noisy_moves(s, &mut moves); if moves.is_empty() { @@ -456,7 +462,7 @@ impl IterativeSearch { return Some(value); } - let mut moves = self.move_pool.new(); + let mut moves = self.move_pool.alloc(); E::G::generate_moves(s, &mut moves); self.total_generate_move_calls += 1; self.total_generated_moves += moves.len() as u64; diff --git a/src/strategies/negamax.rs b/src/strategies/negamax.rs index 7ca7252..9caca2f 100644 --- a/src/strategies/negamax.rs +++ b/src/strategies/negamax.rs @@ -47,7 +47,7 @@ impl Negamax { if depth == 0 { return E::evaluate(s); } - let mut moves = self.move_pool.new(); + let mut moves = self.move_pool.alloc(); E::G::generate_moves(s, &mut moves); let mut best = WORST_EVAL; for m in moves.iter() { @@ -72,7 +72,7 @@ where { fn choose_move(&mut self, s: &::S) -> Option<::M> { let mut best = WORST_EVAL; - let mut moves = self.move_pool.new(); + let mut moves = self.move_pool.alloc(); E::G::generate_moves(s, &mut moves); // Randomly permute order that we look at the moves. // We'll pick the first best score from this list. diff --git a/src/strategies/random.rs b/src/strategies/random.rs index 7c71285..ea14807 100644 --- a/src/strategies/random.rs +++ b/src/strategies/random.rs @@ -14,6 +14,12 @@ impl Random { } } +impl Default for Random { + fn default() -> Self { + Random::new() + } +} + impl Strategy for Random where G::M: Copy, diff --git a/src/util.rs b/src/util.rs index 1871497..61cdcd6 100644 --- a/src/util.rs +++ b/src/util.rs @@ -20,8 +20,8 @@ where let mut strategies: [&mut dyn interface::Strategy; 2] = [s1, s2]; let mut s = 0; while G::get_winner(&state).is_none() { - let ref mut strategy = strategies[s]; - match strategy.choose_move(&mut state) { + let strategy = &mut strategies[s]; + match strategy.choose_move(&state) { Some(m) => m.apply(&mut state), None => break, } @@ -45,8 +45,8 @@ impl Default for MovePool { } impl MovePool { - pub(crate) fn new(&mut self) -> Vec { - self.pool.pop().unwrap_or_else(|| Vec::new()) + pub(crate) fn alloc(&mut self) -> Vec { + self.pool.pop().unwrap_or_else(Vec::new) } pub(crate) fn free(&mut self, mut vec: Vec) { @@ -63,7 +63,7 @@ fn perft_recurse(pool: &mut MovePool, state: &mut G::S, depth: us // Apparently perft rules only count positions at the target depth. return 0; } - let mut moves = pool.new(); + let mut moves = pool.alloc(); G::generate_moves(state, &mut moves); let n = if depth == 1 { moves.len() as u64 From ed13cad99abff5a54e5946db20c7a0e60d7006db Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Tue, 2 Mar 2021 12:55:00 -0800 Subject: [PATCH 039/162] Remove unused .project file. --- .project | 17 ----------------- 1 file changed, 17 deletions(-) delete mode 100644 .project diff --git a/.project b/.project deleted file mode 100644 index 84544fd..0000000 --- a/.project +++ /dev/null @@ -1,17 +0,0 @@ - - - minimax - - - - - - com.github.rustdt.ide.core.Builder - - - - - - com.github.rustdt.ide.core.nature - - From 517552775ac88cf33c41591eead6dcbcd414e501 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Wed, 3 Mar 2021 08:39:24 -0800 Subject: [PATCH 040/162] Simplify default implementation signature for generate_noisy_moves. --- src/interface.rs | 9 ++++----- src/strategies/iterative.rs | 17 +---------------- src/strategies/random.rs | 2 +- 3 files changed, 6 insertions(+), 22 deletions(-) diff --git a/src/interface.rs b/src/interface.rs index 0d66872..ec956cc 100644 --- a/src/interface.rs +++ b/src/interface.rs @@ -85,8 +85,7 @@ pub trait Game: Sized { /// Generate moves at the given state. fn generate_moves(&Self::S, &mut Vec); - /// Optional interface to support strategies using quiescence - /// search. Return true when implemented. + /// Optional interface to support strategies using quiescence search. /// /// A "noisy" move is a threatening move that requires a response. /// @@ -95,9 +94,9 @@ pub trait Game: Sized { /// captures. Evaluating the board state after only the first capture can /// give a misleadingly high score. The solution is to continue the search /// among only noisy moves and find the score once the board state settles. - fn generate_noisy_moves(&Self::S, &mut Vec) -> bool { - // Default implementation indicates unimplemented. - false + fn generate_noisy_moves(&Self::S, &mut Vec) { + // When unimplemented, there are no noisy moves and search terminates + // immediately. } /// Returns `Some(PlayerJustMoved)` or `Some(PlayerToMove)` if there's a winner, diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index 90c5f32..d9bd971 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -162,7 +162,7 @@ impl IterativeOptions { impl Default for IterativeOptions { fn default() -> Self { - Self::new() + Self::new() } } @@ -329,19 +329,6 @@ impl IterativeSearch { &self.pv[..] } - fn check_noisy_search_capability(&mut self, s: &::S) - where - ::M: Copy, - { - if self.opts.max_quiescence_depth > 0 { - let mut moves = self.move_pool.alloc(); - if !E::G::generate_noisy_moves(s, &mut moves) { - panic!("Quiescence search requested, but this game has not implemented generate_noisy_moves."); - } - self.move_pool.free(moves); - } - } - // Negamax only among noisy moves. fn noisy_negamax( &mut self, s: &mut ::S, depth: u8, mut alpha: Evaluation, beta: Evaluation, @@ -360,7 +347,6 @@ impl IterativeSearch { } let mut moves = self.move_pool.alloc(); - // Depth is only allowed to be >0 if this game supports noisy moves. E::G::generate_noisy_moves(s, &mut moves); if moves.is_empty() { // Only quiet moves remain, return leaf evaluation. @@ -524,7 +510,6 @@ where ::M: Copy + Eq, { fn choose_move(&mut self, s: &::S) -> Option<::M> { - self.check_noisy_search_capability(s); self.transposition_table.advance_generation(); // Reset stats. self.nodes_explored.clear(); diff --git a/src/strategies/random.rs b/src/strategies/random.rs index ea14807..e1cbb1f 100644 --- a/src/strategies/random.rs +++ b/src/strategies/random.rs @@ -16,7 +16,7 @@ impl Random { impl Default for Random { fn default() -> Self { - Random::new() + Random::new() } } From 47d7f5852af8c65b6932daf5017d8cc642f80317 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Wed, 3 Mar 2021 09:25:05 -0800 Subject: [PATCH 041/162] Upgrade to rust 2018 edition. --- Cargo.toml | 1 + src/interface.rs | 12 ++++++------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 301bd95..4c28336 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,6 +4,7 @@ version = "0.2.0" authors = [ "Samuel Fredrickson ", "Eric Roshan-Eisner " ] description = "Generic implementations of Minimax." documentation = "https://docs.rs/minimax" +edition = "2018" repository = "https://github.com/edre/minimax-rs" readme = "README.md" keywords = ["ai", "game", "minimax", "negamax"] diff --git a/src/interface.rs b/src/interface.rs index ec956cc..fac2142 100644 --- a/src/interface.rs +++ b/src/interface.rs @@ -31,9 +31,9 @@ pub trait Move { /// The type of game that the move affects. type G: Game; /// Change the state of `S` so that the move is applied. - fn apply(&self, &mut ::S); + fn apply(&self, state: &mut ::S); /// Revert the state of `S` so that the move is undone. - fn undo(&self, &mut ::S); + fn undo(&self, state: &mut ::S); } /// The result of playing a game until it finishes. @@ -83,7 +83,7 @@ pub trait Game: Sized { type M: Move; /// Generate moves at the given state. - fn generate_moves(&Self::S, &mut Vec); + fn generate_moves(state: &Self::S, moves: &mut Vec); /// Optional interface to support strategies using quiescence search. /// @@ -94,7 +94,7 @@ pub trait Game: Sized { /// captures. Evaluating the board state after only the first capture can /// give a misleadingly high score. The solution is to continue the search /// among only noisy moves and find the score once the board state settles. - fn generate_noisy_moves(&Self::S, &mut Vec) { + fn generate_noisy_moves(_state: &Self::S, _moves: &mut Vec) { // When unimplemented, there are no noisy moves and search terminates // immediately. } @@ -102,10 +102,10 @@ pub trait Game: Sized { /// Returns `Some(PlayerJustMoved)` or `Some(PlayerToMove)` if there's a winner, /// `Some(Draw)` if the state is terminal without a winner, and `None` if /// the state is non-terminal. - fn get_winner(&Self::S) -> Option; + fn get_winner(state: &Self::S) -> Option; } /// Defines a method of choosing a move for the current player. pub trait Strategy { - fn choose_move(&mut self, &G::S) -> Option; + fn choose_move(&mut self, state: &G::S) -> Option; } From 712b5516b9abc33994298eea928cf8bd66c9409c Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Wed, 3 Mar 2021 09:21:38 -0800 Subject: [PATCH 042/162] Make Evaluators stateful so they can be configured like Strategies. --- benches/negamax.rs | 5 +-- examples/connect4.rs | 14 +++++--- examples/ttt.rs | 10 ++++-- src/interface.rs | 2 +- src/strategies/iterative.rs | 11 +++--- src/strategies/negamax.rs | 10 +++--- src/strategies/random.rs | 1 - tests/strategies.rs | 68 ++++++++++++++++++++----------------- tests/ttt.rs | 6 ++-- 9 files changed, 71 insertions(+), 56 deletions(-) diff --git a/benches/negamax.rs b/benches/negamax.rs index a0c4e8c..02f8760 100644 --- a/benches/negamax.rs +++ b/benches/negamax.rs @@ -10,7 +10,7 @@ use minimax::*; fn bench_negamax(b: &mut Bencher) { let board = connect4::Board::default(); b.iter(|| { - let mut s = Negamax::::with_max_depth(5); + let mut s = Negamax::new(connect4::BasicEvaluator::default(), 5); let m = s.choose_move(&board); assert!(m.is_some()); }); @@ -19,7 +19,8 @@ fn bench_negamax(b: &mut Bencher) { fn bench_iterative(b: &mut Bencher) { let board = connect4::Board::default(); b.iter(|| { - let mut s = IterativeSearch::::new( + let mut s = IterativeSearch::new( + connect4::BasicEvaluator::default(), IterativeOptions::new().with_table_byte_size(32_000).with_null_window_search(true), ); s.set_max_depth(5); diff --git a/examples/connect4.rs b/examples/connect4.rs index f11b7bd..2a2c44c 100644 --- a/examples/connect4.rs +++ b/examples/connect4.rs @@ -163,7 +163,7 @@ pub struct DumbEvaluator; impl minimax::Evaluator for DumbEvaluator { type G = Game; - fn evaluate(_: &Board) -> minimax::Evaluation { + fn evaluate(&self, _: &Board) -> minimax::Evaluation { 0 } } @@ -198,9 +198,15 @@ impl Board { pub struct BasicEvaluator; +impl Default for BasicEvaluator { + fn default() -> Self { + Self {} + } +} + impl minimax::Evaluator for BasicEvaluator { type G = Game; - fn evaluate(b: &Board) -> minimax::Evaluation { + fn evaluate(&self, b: &Board) -> minimax::Evaluation { let player_pieces = b.pieces_to_move; let opponent_pieces = b.pieces_just_moved(); let mut player_wins = b.find_fourth_moves(player_pieces); @@ -258,10 +264,10 @@ fn main() { let opts = IterativeOptions::new() .with_table_byte_size(1_000_000) .with_replacement_strategy(minimax::Replacement::DepthPreferred); - let mut iterative = IterativeSearch::::new(opts); + let mut iterative = IterativeSearch::new(BasicEvaluator::default(), opts); iterative.set_timeout(Duration::from_secs(1)); let mut strategies: [&mut dyn Strategy; 2] = - [&mut Negamax::::with_max_depth(8), &mut iterative]; + [&mut Negamax::new(DumbEvaluator {}, 8), &mut iterative]; let mut s = 0; while self::Game::get_winner(&b).is_none() { println!("{}", b); diff --git a/examples/ttt.rs b/examples/ttt.rs index 565ff0c..e282e27 100644 --- a/examples/ttt.rs +++ b/examples/ttt.rs @@ -178,10 +178,16 @@ impl minimax::Move for Place { pub struct Evaluator; +impl Default for Evaluator { + fn default() -> Self { + Self {} + } +} + impl minimax::Evaluator for Evaluator { type G = Game; // adapted from http://www.cs.olemiss.edu/~dwilkins/CSCI531/tic.c - fn evaluate(b: &Board) -> minimax::Evaluation { + fn evaluate(&self, b: &Board) -> minimax::Evaluation { let mut score = 0; // 3rd: check for doubles @@ -237,7 +243,7 @@ fn main() { let mut b = Board::default(); let mut strategies = - vec![Negamax::::with_max_depth(10), Negamax::::with_max_depth(10)]; + vec![Negamax::new(Evaluator::default(), 10), Negamax::new(Evaluator::default(), 10)]; let mut s = 0; while self::Game::get_winner(&b).is_none() { println!("{}", b); diff --git a/src/interface.rs b/src/interface.rs index fac2142..3902820 100644 --- a/src/interface.rs +++ b/src/interface.rs @@ -20,7 +20,7 @@ pub trait Evaluator { type G: Game; /// Evaluate the non-terminal state from the persective of the player to /// move next. - fn evaluate(s: &::S) -> Evaluation; + fn evaluate(&self, s: &::S) -> Evaluation; } /// Defines how a move affects the game state. diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index d9bd971..9f2275e 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -9,7 +9,6 @@ use super::super::util::*; use super::util::*; use std::cmp::{max, min}; -use std::marker::PhantomData; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; use std::thread::{sleep, spawn}; @@ -211,7 +210,7 @@ pub struct IterativeSearch { transposition_table: TranspositionTable<<::G as Game>::M>, move_pool: MovePool<::M>, prev_value: Evaluation, - _eval: PhantomData, + eval: E, opts: IterativeOptions, @@ -233,7 +232,7 @@ pub struct IterativeSearch { } impl IterativeSearch { - pub fn new(opts: IterativeOptions) -> IterativeSearch { + pub fn new(eval: E, opts: IterativeOptions) -> IterativeSearch { let table = TranspositionTable::new(opts.table_byte_size, opts.strategy); IterativeSearch { max_depth: 100, @@ -243,7 +242,7 @@ impl IterativeSearch { move_pool: MovePool::<_>::default(), prev_value: 0, opts, - _eval: PhantomData, + eval, actual_depth: 0, nodes_explored: Vec::new(), next_depth_nodes: 0, @@ -343,7 +342,7 @@ impl IterativeSearch { return Some(winner.evaluate()); } if depth == 0 { - return Some(E::evaluate(s)); + return Some(self.eval.evaluate(s)); } let mut moves = self.move_pool.alloc(); @@ -351,7 +350,7 @@ impl IterativeSearch { if moves.is_empty() { // Only quiet moves remain, return leaf evaluation. self.move_pool.free(moves); - return Some(E::evaluate(s)); + return Some(self.eval.evaluate(s)); } let mut best = WORST_EVAL; diff --git a/src/strategies/negamax.rs b/src/strategies/negamax.rs index 9caca2f..645d013 100644 --- a/src/strategies/negamax.rs +++ b/src/strategies/negamax.rs @@ -6,27 +6,25 @@ use super::super::interface::*; use super::super::util::*; use super::util::*; -use rand; use rand::Rng; use std::cmp::max; -use std::marker::PhantomData; pub struct Negamax { max_depth: usize, move_pool: MovePool<::M>, rng: rand::ThreadRng, prev_value: Evaluation, - _eval: PhantomData, + eval: E, } impl Negamax { - pub fn with_max_depth(depth: usize) -> Negamax { + pub fn new(eval: E, depth: usize) -> Negamax { Negamax { max_depth: depth, move_pool: MovePool::<_>::default(), rng: rand::thread_rng(), prev_value: 0, - _eval: PhantomData, + eval, } } @@ -45,7 +43,7 @@ impl Negamax { return winner.evaluate(); } if depth == 0 { - return E::evaluate(s); + return self.eval.evaluate(s); } let mut moves = self.move_pool.alloc(); E::G::generate_moves(s, &mut moves); diff --git a/src/strategies/random.rs b/src/strategies/random.rs index e1cbb1f..ed831d2 100644 --- a/src/strategies/random.rs +++ b/src/strategies/random.rs @@ -1,7 +1,6 @@ //! A strategy that randomly chooses a move, for use in tests. use super::super::interface::*; -use rand; use rand::Rng; pub struct Random { diff --git a/tests/strategies.rs b/tests/strategies.rs index 682cdc8..8d6fbf8 100644 --- a/tests/strategies.rs +++ b/tests/strategies.rs @@ -12,41 +12,40 @@ mod connect4; use minimax::interface::*; use rand::Rng; use std::cmp::max; -use std::marker::PhantomData; - -fn negamax(s: &mut ::S, depth: usize) -> Evaluation -where - <::G as Game>::M: Copy, -{ - if let Some(winner) = E::G::get_winner(s) { - return winner.evaluate(); - } - if depth == 0 { - return E::evaluate(s); - } - let mut moves = Vec::new(); - E::G::generate_moves(s, &mut moves); - let mut best = WORST_EVAL; - for m in moves.iter() { - m.apply(s); - let value = -negamax::(s, depth - 1); - m.undo(s); - best = max(best, value); - } - best -} pub struct PlainNegamax { depth: usize, root_value: Evaluation, // All moves tied with the best valuation. best_moves: Vec<::M>, - _eval: PhantomData, + eval: E, } impl PlainNegamax { - pub fn new(depth: usize) -> PlainNegamax { - PlainNegamax { depth: depth, root_value: 0, best_moves: Vec::new(), _eval: PhantomData } + pub fn new(eval: E, depth: usize) -> PlainNegamax { + PlainNegamax { depth: depth, root_value: 0, best_moves: Vec::new(), eval } + } + + fn negamax(&self, s: &mut ::S, depth: usize) -> Evaluation + where + <::G as Game>::M: Copy, + { + if let Some(winner) = E::G::get_winner(s) { + return winner.evaluate(); + } + if depth == 0 { + return self.eval.evaluate(s); + } + let mut moves = Vec::new(); + E::G::generate_moves(s, &mut moves); + let mut best = WORST_EVAL; + for m in moves.iter() { + m.apply(s); + let value = -self.negamax(s, depth - 1); + m.undo(s); + best = max(best, value); + } + best } } @@ -64,7 +63,7 @@ where let mut s_clone = s.clone(); for &m in moves.iter() { m.apply(&mut s_clone); - let value = -negamax::(&mut s_clone, self.depth); + let value = -self.negamax(&mut s_clone, self.depth); m.undo(&mut s_clone); if value == best_value { self.best_moves.push(m); @@ -81,9 +80,15 @@ where struct RandomEvaluator; +impl Default for RandomEvaluator { + fn default() -> Self { + Self {} + } +} + impl minimax::Evaluator for RandomEvaluator { type G = connect4::Game; - fn evaluate(b: &connect4::Board) -> minimax::Evaluation { + fn evaluate(&self, b: &connect4::Board) -> minimax::Evaluation { // Scramble the game state to get a deterministically random Evaluation. let mut hash = b.pieces_just_moved().wrapping_mul(0xe512dc15f0da3dd1); hash = hash @@ -118,11 +123,11 @@ fn compare_plain_negamax() { for max_depth in 0..5 { let b = generate_random_state(10); - let mut plain_negamax = PlainNegamax::::new(max_depth); + let mut plain_negamax = PlainNegamax::new(RandomEvaluator::default(), max_depth); plain_negamax.choose_move(&b); let value = plain_negamax.root_value; - let mut negamax = minimax::Negamax::::with_max_depth(max_depth); + let mut negamax = minimax::Negamax::new(RandomEvaluator, max_depth); let negamax_move = negamax.choose_move(&b).unwrap(); let negamax_value = negamax.root_value(); assert_eq!(value, negamax_value, "search depth={}\n{}", max_depth, b); @@ -148,7 +153,8 @@ fn compare_plain_negamax() { .drain(..) .enumerate() { - let mut iterative = minimax::IterativeSearch::::new( + let mut iterative = minimax::IterativeSearch::new( + RandomEvaluator::default(), opt.with_table_byte_size(64000), ); iterative.set_max_depth(max_depth); diff --git a/tests/ttt.rs b/tests/ttt.rs index 8bc2a6d..db662fb 100644 --- a/tests/ttt.rs +++ b/tests/ttt.rs @@ -9,8 +9,8 @@ use minimax::Negamax; // Ensure that two players using negamax always results in a draw. #[test] fn test_ttt_negamax_always_draws() { - let mut s1 = Negamax::::with_max_depth(10); - let mut s2 = Negamax::::with_max_depth(10); + let mut s1 = Negamax::new(ttt::Evaluator::default(), 10); + let mut s2 = Negamax::new(ttt::Evaluator::default(), 10); for _ in 0..100 { assert_eq!(battle_royale(&mut s1, &mut s2), None); } @@ -21,7 +21,7 @@ fn test_ttt_negamax_always_draws() { #[test] fn test_ttt_negamax_vs_random_always_wins_or_draws() { use minimax::strategies::random::Random; - let mut s1 = Negamax::::with_max_depth(10); + let mut s1 = Negamax::new(ttt::Evaluator::default(), 10); let mut s2 = Random::new(); for _ in 0..100 { assert_ne!(battle_royale(&mut s1, &mut s2), Some(1)); From b20c524671a8d6a62a12006bfca5f5652e3bb6d1 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Wed, 3 Mar 2021 14:13:04 -0800 Subject: [PATCH 043/162] Replace custom test hash function with DefaultHasher. --- tests/strategies.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/strategies.rs b/tests/strategies.rs index 8d6fbf8..9dd7342 100644 --- a/tests/strategies.rs +++ b/tests/strategies.rs @@ -12,6 +12,8 @@ mod connect4; use minimax::interface::*; use rand::Rng; use std::cmp::max; +use std::collections::hash_map::DefaultHasher; +use std::hash::Hasher; pub struct PlainNegamax { depth: usize, @@ -89,13 +91,11 @@ impl Default for RandomEvaluator { impl minimax::Evaluator for RandomEvaluator { type G = connect4::Game; fn evaluate(&self, b: &connect4::Board) -> minimax::Evaluation { - // Scramble the game state to get a deterministically random Evaluation. - let mut hash = b.pieces_just_moved().wrapping_mul(0xe512dc15f0da3dd1); - hash = hash - .wrapping_add(hash >> 33) - .wrapping_add(b.pieces_to_move) - .wrapping_mul(0x18d9db91aa689617); - hash = hash.wrapping_add(hash >> 31); + // Hash the game state to get a deterministically random Evaluation. + let mut hasher = DefaultHasher::new(); + hasher.write_u64(b.pieces_just_moved()); + hasher.write_u64(b.pieces_to_move); + let hash = hasher.finish(); // Use fewer bits so that we get some equal values. (hash as minimax::Evaluation) >> 25 } From 1a1af0dec8df3ab09c56198169fea8edfa22b8ff Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Sat, 6 Mar 2021 00:01:06 -0800 Subject: [PATCH 044/162] Add parallel strategy, based on rayon and Young Brothers Wait. There's still a lot of duplicated code from IterativeSearch, and I've done no performance measurements or optimizations, but it passes the integration test! --- .gitignore | 2 + Cargo.toml | 1 + src/lib.rs | 5 +- src/strategies/iterative.rs | 32 +-- src/strategies/mod.rs | 3 + src/strategies/table.rs | 142 ++++++++++++ src/strategies/util.rs | 40 ++++ src/strategies/ybw.rs | 429 ++++++++++++++++++++++++++++++++++++ tests/strategies.rs | 18 +- 9 files changed, 639 insertions(+), 33 deletions(-) create mode 100644 src/strategies/table.rs create mode 100644 src/strategies/ybw.rs diff --git a/.gitignore b/.gitignore index fa8d85a..5481574 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ Cargo.lock target + +**/*~ diff --git a/Cargo.toml b/Cargo.toml index 4c28336..691882f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,7 @@ license = "MIT" [dependencies] rand = "0.4.*" +rayon = "^1.5" [dev-dependencies] bencher = "0.1.5" diff --git a/src/lib.rs b/src/lib.rs index dd19105..71e33a1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,7 +4,10 @@ pub mod interface; pub mod strategies; pub mod util; -pub use interface::{Evaluation, Evaluator, Game, Move, Strategy, Winner, Zobrist}; +pub use interface::{ + Evaluation, Evaluator, Game, Move, Strategy, Winner, Zobrist, BEST_EVAL, WORST_EVAL, +}; pub use strategies::iterative::{IterativeOptions, IterativeSearch, Replacement}; pub use strategies::negamax::Negamax; +pub use strategies::ybw::{ParallelYbw, YbwOptions}; pub use util::perft; diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index 9f2275e..e0c110a 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -6,26 +6,14 @@ use super::super::interface::*; use super::super::util::*; +use super::table::*; use super::util::*; use std::cmp::{max, min}; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; -use std::thread::{sleep, spawn}; use std::time::{Duration, Instant}; -fn timeout_signal(dur: Duration) -> Arc { - // Theoretically we could include an async runtime to do this and use - // fewer threads, but the stdlib implementation is only a few lines... - let signal = Arc::new(AtomicBool::new(false)); - let signal2 = signal.clone(); - spawn(move || { - sleep(dur); - signal2.store(true, Ordering::Relaxed); - }); - signal -} - #[derive(Copy, Clone, Debug, Eq, PartialEq)] /// Strategies for when to overwrite entries in the transition table. pub enum Replacement { @@ -35,24 +23,6 @@ pub enum Replacement { // TODO: Bucket(size) } -#[derive(Copy, Clone, Debug, Eq, PartialEq)] -enum EntryFlag { - Exact, - Upperbound, - Lowerbound, -} - -// TODO: Optimize size. Ideally 16 bytes or less. -#[derive(Copy, Clone)] -struct Entry { - hash: u64, - value: Evaluation, - depth: u8, - flag: EntryFlag, - generation: u8, - best_move: Option, -} - struct TranspositionTable { table: Vec>, mask: usize, diff --git a/src/strategies/mod.rs b/src/strategies/mod.rs index acb7ee9..1505756 100644 --- a/src/strategies/mod.rs +++ b/src/strategies/mod.rs @@ -3,4 +3,7 @@ pub mod iterative; pub mod negamax; pub mod random; +pub mod ybw; + +mod table; mod util; diff --git a/src/strategies/table.rs b/src/strategies/table.rs new file mode 100644 index 0000000..c8777cc --- /dev/null +++ b/src/strategies/table.rs @@ -0,0 +1,142 @@ +use crate::interface::*; +use std::cmp::{max, min}; +use std::sync::atomic::{AtomicU8, Ordering}; +use std::sync::Mutex; + +// Common transposition table stuff. + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub(super) enum EntryFlag { + Exact, + Upperbound, + Lowerbound, +} + +// TODO: Optimize size. Ideally 16 bytes or less. +#[derive(Copy, Clone)] +pub(super) struct Entry { + pub(super) hash: u64, + pub(super) value: Evaluation, + pub(super) depth: u8, + pub(super) flag: EntryFlag, + pub(super) generation: u8, + pub(super) best_move: Option, +} + +#[test] +fn test_entry_size() { + // TODO + //assert!(std::mem::size_of::>() <= 20); + //assert!(std::mem::size_of::>>() <= 20); +} + +// It would be nice to unify most of the implementation of the single-threaded +// and concurrent tables, but the methods need different signatures. +pub(super) struct ConcurrentTable { + table: Vec>>, + mask: usize, + // Incremented for each iterative deepening run. + // Values from old generations are always overwritten. + generation: AtomicU8, +} + +impl ConcurrentTable { + pub(super) fn new(table_byte_size: usize) -> Self { + let size = (table_byte_size / std::mem::size_of::>>()).next_power_of_two(); + let mask = (size - 1) & !1; + let mut table = Vec::with_capacity(size); + for _ in 0..size { + table.push(Mutex::new(Entry:: { + hash: 0, + value: 0, + depth: 0, + flag: EntryFlag::Exact, + generation: 0, + best_move: None, + })); + } + Self { table, mask, generation: AtomicU8::new(0) } + } + + pub(super) fn advance_generation(&self) { + self.generation.fetch_add(1, Ordering::SeqCst); + } +} + +impl ConcurrentTable +where + M: Copy, +{ + // Using two-tier table, look in the two adjacent slots + pub(super) fn lookup(&self, hash: u64) -> Option> { + let index = (hash as usize) & self.mask; + for i in index..index + 2 { + let entry = self.table[i].lock().unwrap(); + if hash == entry.hash { + return Some(*entry); + } + } + None + } + + fn store(&self, hash: u64, value: Evaluation, depth: u8, flag: EntryFlag, best_move: M) { + let table_gen = self.generation.load(Ordering::Relaxed); + // index points to the first of a pair of entries, the depth-preferred entry and the always-replace entry. + let index = (hash as usize) & self.mask; + let new_entry = + Entry { hash, value, depth, flag, generation: table_gen, best_move: Some(best_move) }; + { + let mut entry = self.table[index].lock().unwrap(); + if entry.generation != table_gen || entry.depth <= depth { + *entry = new_entry; + return; + } + } + // Otherwise, always overwrite second entry. + *self.table[index + 1].lock().unwrap() = new_entry; + } + + // Check and update negamax state based on any transposition table hit. + // Returns Some(value) on an exact match. + // Returns None, updating mutable arguments, if Negamax should continue to explore this node. + pub(super) fn check( + &self, hash: u64, depth: u8, good_move: &mut Option, alpha: &mut Evaluation, + beta: &mut Evaluation, + ) -> Option { + if let Some(entry) = self.lookup(hash) { + *good_move = entry.best_move; + if entry.depth >= depth { + match entry.flag { + EntryFlag::Exact => { + return Some(entry.value); + } + EntryFlag::Lowerbound => { + *alpha = max(*alpha, entry.value); + } + EntryFlag::Upperbound => { + *beta = min(*beta, entry.value); + } + } + if *alpha >= *beta { + return Some(entry.value); + } + } + } + None + } + + // Update table based on negamax results. + pub(super) fn update( + &self, hash: u64, alpha_orig: Evaluation, beta: Evaluation, depth: u8, best: Evaluation, + best_move: M, + ) { + let flag = if best <= alpha_orig { + EntryFlag::Upperbound + } else if best >= beta { + EntryFlag::Lowerbound + } else { + EntryFlag::Exact + }; + self.store(hash, best, depth, flag, best_move); + } +} diff --git a/src/strategies/util.rs b/src/strategies/util.rs index b21778b..9f86195 100644 --- a/src/strategies/util.rs +++ b/src/strategies/util.rs @@ -1,5 +1,10 @@ use super::super::interface::*; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; +use std::thread::{sleep, spawn}; +use std::time::Duration; + // For values near winning and losing values, push them slightly closer to zero. // A win in 3 moves (BEST-3) will be chosen over a win in 5 moves (BEST-5). // A loss in 5 moves (WORST+5) will be chosen over a loss in 3 moves (WORST+3). @@ -23,3 +28,38 @@ pub(super) fn unclamp_value(value: Evaluation) -> Evaluation { value } } + +pub(super) fn timeout_signal(dur: Duration) -> Arc { + // Theoretically we could include an async runtime to do this and use + // fewer threads, but the stdlib implementation is only a few lines... + let signal = Arc::new(AtomicBool::new(false)); + let signal2 = signal.clone(); + spawn(move || { + sleep(dur); + signal2.store(true, Ordering::Relaxed); + }); + signal +} + +// This exists to be wrapped in a mutex, because it didn't work when I tried a tuple.' +pub(super) struct ValueMove { + value: Evaluation, + m: M, +} + +impl ValueMove { + pub(super) fn new(value: Evaluation, m: M) -> Self { + Self { value, m } + } + + pub(super) fn max(&mut self, value: Evaluation, m: M) { + if value > self.value { + self.value = value; + self.m = m; + } + } + + pub(super) fn into_inner(self) -> (Evaluation, M) { + (self.value, self.m) + } +} diff --git a/src/strategies/ybw.rs b/src/strategies/ybw.rs new file mode 100644 index 0000000..c1b52d3 --- /dev/null +++ b/src/strategies/ybw.rs @@ -0,0 +1,429 @@ +//! An implementation of iterative deeping, with each iteration executed in parallel. +//! +//! This implementation uses the Young Brothers Wait Concept, which evaluates +//! the best guess move serially first, then parallelizes all other moves +//! using rayon. This tries to reduce redundant computation at the expense of +//! more board state clones and slightly more thread synchronization. + +extern crate rayon; + +use super::super::interface::*; +use super::table::*; +use super::util::*; + +use rayon::prelude::*; +use std::cmp::max; +use std::sync::atomic::{AtomicBool, AtomicI32, Ordering}; +use std::sync::{Arc, Mutex}; +use std::time::{Duration, Instant}; + +/// Options to use for the parallel search engine. +#[derive(Clone, Copy)] +pub struct YbwOptions { + table_byte_size: usize, + null_window_search: bool, + step_increment: u8, + max_quiescence_depth: u8, + serial_cutoff_depth: u8, +} + +impl YbwOptions { + pub fn new() -> Self { + YbwOptions { + table_byte_size: 32_000_000, + null_window_search: true, + step_increment: 1, + max_quiescence_depth: 0, + serial_cutoff_depth: 1, + } + } +} + +impl Default for YbwOptions { + fn default() -> Self { + Self::new() + } +} + +impl YbwOptions { + /// Approximately how large the transposition table should be in memory. + pub fn with_table_byte_size(mut self, size: usize) -> Self { + self.table_byte_size = size; + self + } + + /// Whether to add null-window searches to try to prune branches that are + /// probably worse than those already found. Also known as principal + /// variation search. + pub fn with_null_window_search(mut self, null: bool) -> Self { + self.null_window_search = null; + self + } + + /// Increment the depth by two between iterations. + pub fn with_double_step_increment(mut self) -> Self { + self.step_increment = 2; + self + } + + /// Enable [quiescence + /// search](https://en.wikipedia.org/wiki/Quiescence_search) at the leaves + /// of the search tree. The Game must implement `generate_noisy_moves` + /// for the search to know when the state has become "quiet". + pub fn with_quiescence_search_depth(mut self, depth: u8) -> Self { + self.max_quiescence_depth = depth; + self + } +} + +pub struct ParallelYbw { + max_depth: usize, + max_time: Duration, + timeout: Arc, + table: ConcurrentTable<<::G as Game>::M>, + //move_pool: MovePool<::M>, + prev_value: Evaluation, + eval: E, + + opts: YbwOptions, + + // Runtime stats for the last move generated. + + // Maximum depth used to produce the move. + actual_depth: u8, + // Nodes explored at each depth. + nodes_explored: Vec, + // Nodes explored past this depth, and thus only useful for filling TT for + // next choose_move. + next_depth_nodes: u64, + // For computing the average branching factor. + total_generate_move_calls: u64, + total_generated_moves: u64, + table_hits: usize, + pv: Vec<::M>, + wall_time: Duration, +} + +impl ParallelYbw { + pub fn new(eval: E, opts: YbwOptions) -> ParallelYbw { + let table = ConcurrentTable::new(opts.table_byte_size); + ParallelYbw { + max_depth: 100, + max_time: Duration::from_secs(5), + timeout: Arc::new(AtomicBool::new(false)), + table, + //move_pool: MovePool::<_>::default(), + prev_value: 0, + opts, + eval, + actual_depth: 0, + nodes_explored: Vec::new(), + next_depth_nodes: 0, + total_generate_move_calls: 0, + total_generated_moves: 0, + table_hits: 0, + pv: Vec::new(), + wall_time: Duration::default(), + } + } + + /// Set the maximum depth to search. Disables the timeout. + /// This can be changed between moves while reusing the transposition table. + pub fn set_max_depth(&mut self, depth: usize) { + self.max_depth = depth; + self.max_time = Duration::new(0, 0); + } + + /// Set the maximum time to compute the best move. When the timeout is + /// hit, it returns the best move found of the previous full + /// iteration. Unlimited max depth. + pub fn set_timeout(&mut self, max_time: Duration) { + self.max_time = max_time; + self.max_depth = 100; + } + + /// Return a human-readable summary of the last move generation. + pub fn stats(&self) -> String { + let total_nodes_explored: u64 = self.nodes_explored.iter().sum(); + let mean_branching_factor = + self.total_generated_moves as f64 / self.total_generate_move_calls as f64; + let effective_branching_factor = (*self.nodes_explored.last().unwrap_or(&0) as f64) + .powf((self.actual_depth as f64 + 1.0).recip()); + let throughput = + (total_nodes_explored + self.next_depth_nodes) as f64 / self.wall_time.as_secs_f64(); + format!("Explored {} nodes to depth {}. MBF={:.1} EBF={:.1}\nPartial exploration of next depth hit {} nodes.\n{} transposition table hits.\n{} nodes/sec", + total_nodes_explored, self.actual_depth, mean_branching_factor, effective_branching_factor, + self.next_depth_nodes, self.table_hits, throughput as usize) + } + + #[doc(hidden)] + pub fn root_value(&self) -> Evaluation { + unclamp_value(self.prev_value) + } + + // After finishing a search, populate the principal variation as deep as + // the table remembers it. + fn populate_pv(&mut self, s: &mut ::S, mut depth: u8) + where + ::S: Zobrist, + ::M: Copy, + { + self.pv.clear(); + let mut hash = s.zobrist_hash(); + while let Some(entry) = self.table.lookup(hash) { + // The principal variation should only have exact nodes, as other + // node types are from cutoffs where the node is proven to be + // worse than a previously explored one. + // + // Sometimes, it takes multiple rounds of narrowing bounds for the + // value to be exact, and we can't guarantee that the table entry + // will remain in the table between the searches that find + // equivalent upper and lower bounds. + let m = entry.best_move.unwrap(); + self.pv.push(m); + m.apply(s); + hash = s.zobrist_hash(); + // Prevent cyclical PVs from being infinitely long. + if depth == 0 { + break; + } + depth -= 1; + } + // Restore state. + for m in self.pv.iter().rev() { + m.undo(s); + } + } + + /// Return what the engine considered to be the best sequence of moves + /// from both sides. + pub fn principal_variation(&self) -> &[::M] { + &self.pv[..] + } + + // Negamax only among noisy moves. + fn noisy_negamax( + &self, s: &mut ::S, depth: u8, mut alpha: Evaluation, beta: Evaluation, + ) -> Option + where + ::M: Copy, + { + if self.timeout.load(Ordering::Relaxed) { + return None; + } + if let Some(winner) = E::G::get_winner(s) { + return Some(winner.evaluate()); + } + if depth == 0 { + return Some(self.eval.evaluate(s)); + } + + //let mut moves = self.move_pool.alloc(); + let mut moves = Vec::new(); + E::G::generate_noisy_moves(s, &mut moves); + if moves.is_empty() { + // Only quiet moves remain, return leaf evaluation. + //self.move_pool.free(moves); + return Some(self.eval.evaluate(s)); + } + + let mut best = WORST_EVAL; + for m in moves.iter() { + m.apply(s); + let value = -self.noisy_negamax(s, depth - 1, -beta, -alpha)?; + m.undo(s); + best = max(best, value); + alpha = max(alpha, value); + if alpha >= beta { + break; + } + } + //self.move_pool.free(moves); + Some(best) + } + + // Recursively compute negamax on the game state. Returns None if it hits the timeout. + fn negamax( + &self, s: &mut ::S, depth: u8, mut alpha: Evaluation, mut beta: Evaluation, + ) -> Option + where + ::S: Clone + Zobrist + Send + Sync, + ::M: Copy + Eq + Send + Sync, + E: Sync, + { + if self.timeout.load(Ordering::Relaxed) { + return None; + } + + //self.next_depth_nodes += 1; + + if depth == 0 { + // Evaluate quiescence search on leaf nodes. + // Will just return the node's evaluation if quiescence search is disabled. + return self.noisy_negamax(s, self.opts.max_quiescence_depth, alpha, beta); + } + if let Some(winner) = E::G::get_winner(s) { + return Some(winner.evaluate()); + } + + let alpha_orig = alpha; + let hash = s.zobrist_hash(); + let mut good_move = None; + if let Some(value) = self.table.check(hash, depth, &mut good_move, &mut alpha, &mut beta) { + return Some(value); + } + + //let mut moves = self.move_pool.alloc(); + let mut moves = Vec::new(); + E::G::generate_moves(s, &mut moves); + //self.total_generate_move_calls += 1; + //self.total_generated_moves += moves.len() as u64; + if moves.is_empty() { + //self.move_pool.free(moves); + return Some(WORST_EVAL); + } + let first_move = good_move.unwrap_or(moves[0]); + + // Evaluate first move serially. + first_move.apply(s); + let initial_value = -self.negamax(s, depth - 1, -beta, -alpha)?; + first_move.undo(s); + alpha = max(alpha, initial_value); + let (best, best_move) = if alpha >= beta { + // Skip search + (initial_value, first_move) + } else if self.opts.serial_cutoff_depth >= depth { + // Serial search + let mut best = initial_value; + let mut best_move = first_move; + let mut null_window = false; + for &m in moves.iter() { + if m == first_move { + continue; + } + m.apply(s); + let value = if null_window { + let probe = -self.negamax(s, depth - 1, -alpha - 1, -alpha)?; + if probe > alpha && probe < beta { + // Full search fallback. + -self.negamax(s, depth - 1, -beta, -probe)? + } else { + probe + } + } else { + -self.negamax(s, depth - 1, -beta, -alpha)? + }; + m.undo(s); + if value > best { + best = value; + best_move = m; + } + if value > alpha { + alpha = value; + // Now that we've found a good move, assume following moves + // are worse, and seek to cull them without full evaluation. + null_window = self.opts.null_window_search; + } + if alpha >= beta { + break; + } + } + (best, best_move) + } else { + let alpha = AtomicI32::new(alpha); + let best_move = Mutex::new(ValueMove::new(initial_value, first_move)); + // Parallel search + let result = moves.par_iter().with_max_len(1).try_for_each(|&m| -> Option<()> { + // Check to see if we're cancelled by another branch. + let initial_alpha = alpha.load(Ordering::SeqCst); + if initial_alpha >= beta { + return None; + } + + let mut state = s.clone(); + m.apply(&mut state); + let value = if self.opts.null_window_search && initial_alpha > alpha_orig { + // TODO: send reference to alpha as neg_beta to children. + let probe = + -self.negamax(&mut state, depth - 1, -initial_alpha - 1, -initial_alpha)?; + if probe > initial_alpha && probe < beta { + // Check again that we're not cancelled. + if alpha.load(Ordering::SeqCst) >= beta { + return None; + } + // Full search fallback. + -self.negamax(&mut state, depth - 1, -beta, -probe)? + } else { + probe + } + } else { + -self.negamax(&mut state, depth - 1, -beta, -initial_alpha)? + }; + + alpha.fetch_max(value, Ordering::SeqCst); + let mut bests = best_move.lock().unwrap(); + bests.max(value, m); + Some(()) + }); + if result.is_none() { + // Check for timeout. + if self.timeout.load(Ordering::Relaxed) { + return None; + } + } + best_move.into_inner().unwrap().into_inner() + }; + + self.table.update(hash, alpha_orig, beta, depth, best, best_move); + //self.move_pool.free(moves); + Some(clamp_value(best)) + } +} + +impl Strategy for ParallelYbw +where + ::S: Clone + Zobrist + Send + Sync, + ::M: Copy + Eq + Send + Sync, + E: Sync, +{ + fn choose_move(&mut self, s: &::S) -> Option<::M> { + self.table.advance_generation(); + // Reset stats. + self.nodes_explored.clear(); + self.next_depth_nodes = 0; + self.total_generate_move_calls = 0; + self.total_generated_moves = 0; + self.actual_depth = 0; + self.table_hits = 0; + let start_time = Instant::now(); + // Start timer if configured. + self.timeout = if self.max_time == Duration::new(0, 0) { + Arc::new(AtomicBool::new(false)) + } else { + timeout_signal(self.max_time) + }; + + let root_hash = s.zobrist_hash(); + let mut s_clone = s.clone(); + let mut best_move = None; + + let mut depth = self.max_depth as u8 % self.opts.step_increment; + while depth <= self.max_depth as u8 { + if self.negamax(&mut s_clone, depth + 1, WORST_EVAL, BEST_EVAL).is_none() { + // Timeout. Return the best move from the previous depth. + break; + } + let entry = self.table.lookup(root_hash).unwrap(); + best_move = entry.best_move; + + self.actual_depth = max(self.actual_depth, depth); + self.nodes_explored.push(self.next_depth_nodes); + self.prev_value = entry.value; + self.next_depth_nodes = 0; + depth += self.opts.step_increment; + self.populate_pv(&mut s_clone, depth + 1); + } + self.wall_time = start_time.elapsed(); + best_move + } +} diff --git a/tests/strategies.rs b/tests/strategies.rs index 9dd7342..01847a7 100644 --- a/tests/strategies.rs +++ b/tests/strategies.rs @@ -9,7 +9,7 @@ extern crate rand; #[path = "../examples/connect4.rs"] mod connect4; -use minimax::interface::*; +use minimax::*; use rand::Rng; use std::cmp::max; use std::collections::hash_map::DefaultHasher; @@ -173,6 +173,22 @@ fn compare_plain_negamax() { b ); } + + let mut parallel = ParallelYbw::new( + RandomEvaluator::default(), + YbwOptions::default().with_table_byte_size(64000), + ); + parallel.set_max_depth(max_depth); + let parallel_move = parallel.choose_move(&b).unwrap(); + let parallel_value = parallel.root_value(); + assert_eq!(value, parallel_value, "search depth={}\n{}", max_depth, b); + assert!( + plain_negamax.best_moves.contains(¶llel_move), + "bad move={:?}\nsearch depth={}\n{}", + parallel_move, + max_depth, + b + ); } } } From d218c17789ea5169ba6b0dbecb2ef3865fda6a0b Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Sat, 6 Mar 2021 17:53:02 -0800 Subject: [PATCH 045/162] Add deeper correctness test. --- tests/strategies.rs | 48 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 40 insertions(+), 8 deletions(-) diff --git a/tests/strategies.rs b/tests/strategies.rs index 01847a7..1ff4501 100644 --- a/tests/strategies.rs +++ b/tests/strategies.rs @@ -127,7 +127,7 @@ fn compare_plain_negamax() { plain_negamax.choose_move(&b); let value = plain_negamax.root_value; - let mut negamax = minimax::Negamax::new(RandomEvaluator, max_depth); + let mut negamax = Negamax::new(RandomEvaluator, max_depth); let negamax_move = negamax.choose_move(&b).unwrap(); let negamax_value = negamax.root_value(); assert_eq!(value, negamax_value, "search depth={}\n{}", max_depth, b); @@ -141,19 +141,18 @@ fn compare_plain_negamax() { // Sampling of the configuration space. for (option_num, opt) in vec![ - minimax::IterativeOptions::new() - .with_replacement_strategy(minimax::Replacement::DepthPreferred) + IterativeOptions::new() + .with_replacement_strategy(Replacement::DepthPreferred) .with_null_window_search(true), - minimax::IterativeOptions::new() - .with_replacement_strategy(minimax::Replacement::Always) + IterativeOptions::new() + .with_replacement_strategy(Replacement::Always) .with_double_step_increment(), - minimax::IterativeOptions::new() - .with_replacement_strategy(minimax::Replacement::TwoTier), + IterativeOptions::new().with_replacement_strategy(Replacement::TwoTier), ] .drain(..) .enumerate() { - let mut iterative = minimax::IterativeSearch::new( + let mut iterative = IterativeSearch::new( RandomEvaluator::default(), opt.with_table_byte_size(64000), ); @@ -192,3 +191,36 @@ fn compare_plain_negamax() { } } } + +// The same test, but a deeper tree (without plain negamax) to try to expose +// more parallelism in the parallel strategies. +#[test] +fn compare_deep_negamax() { + for _ in 0..10 { + for max_depth in 0..9 { + let b = generate_random_state(10); + + let mut negamax = Negamax::new(RandomEvaluator, max_depth); + negamax.choose_move(&b).unwrap(); + let value = negamax.root_value(); + + let mut iterative = IterativeSearch::new( + RandomEvaluator::default(), + IterativeOptions::new().with_table_byte_size(64000), + ); + iterative.set_max_depth(max_depth); + iterative.choose_move(&b).unwrap(); + let iterative_value = iterative.root_value(); + assert_eq!(value, iterative_value, "search depth={}\n{}", max_depth, b); + + let mut parallel = ParallelYbw::new( + RandomEvaluator::default(), + YbwOptions::default().with_table_byte_size(64000), + ); + parallel.set_max_depth(max_depth); + parallel.choose_move(&b).unwrap(); + let parallel_value = parallel.root_value(); + assert_eq!(value, parallel_value, "search depth={}\n{}", max_depth, b); + } + } +} From b964f066d566d1e158626bff296ec2fe0402183a Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Sun, 7 Mar 2021 21:13:45 -0800 Subject: [PATCH 046/162] Use connect4 example as deterministic macrobenchmark. --- examples/connect4.rs | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/examples/connect4.rs b/examples/connect4.rs index 2a2c44c..3e37c38 100644 --- a/examples/connect4.rs +++ b/examples/connect4.rs @@ -5,7 +5,6 @@ extern crate minimax; use std::default::Default; use std::fmt::{Display, Formatter, Result}; -use std::time::Duration; #[derive(Clone)] pub struct Board { @@ -252,7 +251,7 @@ impl minimax::Evaluator for BasicEvaluator { fn main() { use minimax::{perft, Game, Move, Strategy}; - use minimax::{IterativeOptions, IterativeSearch, Negamax}; + use minimax::{IterativeOptions, IterativeSearch, Negamax, ParallelYbw, YbwOptions}; let mut b = Board::default(); @@ -261,13 +260,27 @@ fn main() { return; } - let opts = IterativeOptions::new() - .with_table_byte_size(1_000_000) - .with_replacement_strategy(minimax::Replacement::DepthPreferred); + let mut dumb = IterativeSearch::new( + BasicEvaluator::default(), + IterativeOptions::new().with_double_step_increment(), + ); + dumb.set_max_depth(8); + + let opts = IterativeOptions::new().with_table_byte_size(16_000_000); let mut iterative = IterativeSearch::new(BasicEvaluator::default(), opts); - iterative.set_timeout(Duration::from_secs(1)); - let mut strategies: [&mut dyn Strategy; 2] = - [&mut Negamax::new(DumbEvaluator {}, 8), &mut iterative]; + iterative.set_max_depth(12); + let mut parallel = ParallelYbw::new( + BasicEvaluator::default(), + YbwOptions::new().with_table_byte_size(16_000_000), + ); + parallel.set_max_depth(12); + let mut strategies: [&mut dyn Strategy; 3] = + [&mut dumb, &mut iterative, &mut parallel]; + + if std::env::args().any(|arg| arg == "parallel") { + strategies.swap(1, 2); + } + let mut s = 0; while self::Game::get_winner(&b).is_none() { println!("{}", b); From 5aff5e0f0c142376f470d3aeb5494d92632e7a39 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Mon, 8 Mar 2021 20:54:36 -0800 Subject: [PATCH 047/162] Switch to parking_lot::Mutex for slightly better perf and entry size. --- Cargo.toml | 1 + src/strategies/table.rs | 16 +++++++++------- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 691882f..37c125e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,7 @@ keywords = ["ai", "game", "minimax", "negamax"] license = "MIT" [dependencies] +parking_lot = "^0.11" rand = "0.4.*" rayon = "^1.5" diff --git a/src/strategies/table.rs b/src/strategies/table.rs index c8777cc..79c291e 100644 --- a/src/strategies/table.rs +++ b/src/strategies/table.rs @@ -1,7 +1,9 @@ +extern crate parking_lot; + use crate::interface::*; +use parking_lot::Mutex; use std::cmp::{max, min}; use std::sync::atomic::{AtomicU8, Ordering}; -use std::sync::Mutex; // Common transposition table stuff. @@ -25,9 +27,9 @@ pub(super) struct Entry { #[test] fn test_entry_size() { - // TODO - //assert!(std::mem::size_of::>() <= 20); - //assert!(std::mem::size_of::>>() <= 20); + // TODO: ratchet down + assert!(std::mem::size_of::>() <= 24); + assert!(std::mem::size_of::>>() <= 32); } // It would be nice to unify most of the implementation of the single-threaded @@ -71,7 +73,7 @@ where pub(super) fn lookup(&self, hash: u64) -> Option> { let index = (hash as usize) & self.mask; for i in index..index + 2 { - let entry = self.table[i].lock().unwrap(); + let entry = self.table[i].lock(); if hash == entry.hash { return Some(*entry); } @@ -86,14 +88,14 @@ where let new_entry = Entry { hash, value, depth, flag, generation: table_gen, best_move: Some(best_move) }; { - let mut entry = self.table[index].lock().unwrap(); + let mut entry = self.table[index].lock(); if entry.generation != table_gen || entry.depth <= depth { *entry = new_entry; return; } } // Otherwise, always overwrite second entry. - *self.table[index + 1].lock().unwrap() = new_entry; + *self.table[index + 1].lock() = new_entry; } // Check and update negamax state based on any transposition table hit. From a72f338cdd8ce7cff37cf3d271f5b75fc81b1080 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Mon, 8 Mar 2021 20:58:31 -0800 Subject: [PATCH 048/162] Update regular benchmark. Mostly measures overhead as it's not long enough to get much multithreading. --- benches/negamax.rs | 15 ++++++++++++++- examples/connect4.rs | 2 +- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/benches/negamax.rs b/benches/negamax.rs index 02f8760..c9ea242 100644 --- a/benches/negamax.rs +++ b/benches/negamax.rs @@ -29,5 +29,18 @@ fn bench_iterative(b: &mut Bencher) { }); } -benchmark_group!(benches, bench_negamax, bench_iterative); +fn bench_parallel(b: &mut Bencher) { + let board = connect4::Board::default(); + b.iter(|| { + let mut s = ParallelYbw::new( + connect4::BasicEvaluator::default(), + YbwOptions::new().with_table_byte_size(32_000).with_null_window_search(true), + ); + s.set_max_depth(5); + let m = s.choose_move(&board); + assert!(m.is_some()); + }); +} + +benchmark_group!(benches, bench_negamax, bench_iterative, bench_parallel); benchmark_main!(benches); diff --git a/examples/connect4.rs b/examples/connect4.rs index 3e37c38..26fb802 100644 --- a/examples/connect4.rs +++ b/examples/connect4.rs @@ -251,7 +251,7 @@ impl minimax::Evaluator for BasicEvaluator { fn main() { use minimax::{perft, Game, Move, Strategy}; - use minimax::{IterativeOptions, IterativeSearch, Negamax, ParallelYbw, YbwOptions}; + use minimax::{IterativeOptions, IterativeSearch, ParallelYbw, YbwOptions}; let mut b = Board::default(); From 0da96f1536f09a75848046005a0b649bdfad5506 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Mon, 8 Mar 2021 21:20:35 -0800 Subject: [PATCH 049/162] Refactor single-thread transposition table into trait. --- src/strategies/iterative.rs | 71 ++++++++----------------------------- src/strategies/table.rs | 51 ++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 56 deletions(-) diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index e0c110a..eb13dd5 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -9,7 +9,7 @@ use super::super::util::*; use super::table::*; use super::util::*; -use std::cmp::{max, min}; +use std::cmp::max; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; use std::time::{Duration, Instant}; @@ -32,7 +32,7 @@ struct TranspositionTable { strategy: Replacement, } -impl TranspositionTable { +impl TranspositionTable { fn new(table_byte_size: usize, strategy: Replacement) -> Self { let size = (table_byte_size / std::mem::size_of::>()).next_power_of_two(); let mask = if strategy == Replacement::TwoTier { (size - 1) & !1 } else { size - 1 }; @@ -53,16 +53,18 @@ impl TranspositionTable { fn advance_generation(&mut self) { self.generation = self.generation.wrapping_add(1); } +} - fn lookup(&self, hash: u64) -> Option<&Entry> { +impl Table for TranspositionTable { + fn lookup(&self, hash: u64) -> Option> { let index = (hash as usize) & self.mask; let entry = &self.table[index]; if hash == entry.hash { - Some(entry) + Some(*entry) } else if self.strategy == Replacement::TwoTier { let entry = &self.table[index + 1]; if hash == entry.hash { - Some(entry) + Some(*entry) } else { None } @@ -201,7 +203,10 @@ pub struct IterativeSearch { wall_time: Duration, } -impl IterativeSearch { +impl IterativeSearch +where + ::M: Copy, +{ pub fn new(eval: E, opts: IterativeOptions) -> IterativeSearch { let table = TranspositionTable::new(opts.table_byte_size, opts.strategy); IterativeSearch { @@ -338,54 +343,6 @@ impl IterativeSearch { Some(best) } - // Check and update negamax state based on any transposition table hit. - #[inline] - fn table_check( - &mut self, hash: u64, depth: u8, good_move: &mut Option<::M>, - alpha: &mut Evaluation, beta: &mut Evaluation, - ) -> Option - where - ::M: Copy, - { - if let Some(entry) = self.transposition_table.lookup(hash) { - *good_move = entry.best_move; - self.table_hits += 1; - if entry.depth >= depth { - match entry.flag { - EntryFlag::Exact => { - return Some(entry.value); - } - EntryFlag::Lowerbound => { - *alpha = max(*alpha, entry.value); - } - EntryFlag::Upperbound => { - *beta = min(*beta, entry.value); - } - } - if *alpha >= *beta { - return Some(entry.value); - } - } - } - None - } - - // Update table based on negamax results. - #[inline(always)] - fn table_update( - &mut self, hash: u64, alpha_orig: Evaluation, beta: Evaluation, depth: u8, - best: Evaluation, best_move: ::M, - ) { - let flag = if best <= alpha_orig { - EntryFlag::Upperbound - } else if best >= beta { - EntryFlag::Lowerbound - } else { - EntryFlag::Exact - }; - self.transposition_table.store(hash, best, depth, flag, best_move); - } - // Recursively compute negamax on the game state. Returns None if it hits the timeout. fn negamax( &mut self, s: &mut ::S, depth: u8, mut alpha: Evaluation, @@ -413,7 +370,9 @@ impl IterativeSearch { let alpha_orig = alpha; let hash = s.zobrist_hash(); let mut good_move = None; - if let Some(value) = self.table_check(hash, depth, &mut good_move, &mut alpha, &mut beta) { + if let Some(value) = + self.transposition_table.check(hash, depth, &mut good_move, &mut alpha, &mut beta) + { return Some(value); } @@ -467,7 +426,7 @@ impl IterativeSearch { } } - self.table_update(hash, alpha_orig, beta, depth, best, best_move); + self.transposition_table.update(hash, alpha_orig, beta, depth, best, best_move); self.move_pool.free(moves); Some(clamp_value(best)) } diff --git a/src/strategies/table.rs b/src/strategies/table.rs index 79c291e..e35bae0 100644 --- a/src/strategies/table.rs +++ b/src/strategies/table.rs @@ -32,6 +32,57 @@ fn test_entry_size() { assert!(std::mem::size_of::>>() <= 32); } +// A trait for a transposition table. The methods are mutual exclusion, but +// the idea is that an implementation can wrap a shared concurrent table. +pub(super) trait Table { + fn lookup(&self, hash: u64) -> Option>; + fn store(&mut self, hash: u64, value: Evaluation, depth: u8, flag: EntryFlag, best_move: M); + + // Check and update negamax state based on any transposition table hit. + // Returns Some(value) on an exact match. + // Returns None, updating mutable arguments, if Negamax should continue to explore this node. + fn check( + &self, hash: u64, depth: u8, good_move: &mut Option, alpha: &mut Evaluation, + beta: &mut Evaluation, + ) -> Option { + if let Some(entry) = self.lookup(hash) { + *good_move = entry.best_move; + if entry.depth >= depth { + match entry.flag { + EntryFlag::Exact => { + return Some(entry.value); + } + EntryFlag::Lowerbound => { + *alpha = max(*alpha, entry.value); + } + EntryFlag::Upperbound => { + *beta = min(*beta, entry.value); + } + } + if *alpha >= *beta { + return Some(entry.value); + } + } + } + None + } + + // Update table based on negamax results. + fn update( + &mut self, hash: u64, alpha_orig: Evaluation, beta: Evaluation, depth: u8, + best: Evaluation, best_move: M, + ) { + let flag = if best <= alpha_orig { + EntryFlag::Upperbound + } else if best >= beta { + EntryFlag::Lowerbound + } else { + EntryFlag::Exact + }; + self.store(hash, best, depth, flag, best_move); + } +} + // It would be nice to unify most of the implementation of the single-threaded // and concurrent tables, but the methods need different signatures. pub(super) struct ConcurrentTable { From 415220de12cd7d6b30528885a279fc4d45651f78 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Mon, 8 Mar 2021 22:13:06 -0800 Subject: [PATCH 050/162] Bring ConcurrentTable into the Table trait --- src/strategies/table.rs | 58 ++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 33 deletions(-) diff --git a/src/strategies/table.rs b/src/strategies/table.rs index e35bae0..97e00ee 100644 --- a/src/strategies/table.rs +++ b/src/strategies/table.rs @@ -4,6 +4,7 @@ use crate::interface::*; use parking_lot::Mutex; use std::cmp::{max, min}; use std::sync::atomic::{AtomicU8, Ordering}; +use std::sync::Arc; // Common transposition table stuff. @@ -116,12 +117,30 @@ impl ConcurrentTable { } } +impl Table for ConcurrentTable { + fn lookup(&self, hash: u64) -> Option> { + self.concurrent_lookup(hash) + } + fn store(&mut self, hash: u64, value: Evaluation, depth: u8, flag: EntryFlag, best_move: M) { + self.concurrent_store(hash, value, depth, flag, best_move) + } +} + +impl Table for Arc> { + fn lookup(&self, hash: u64) -> Option> { + self.concurrent_lookup(hash) + } + fn store(&mut self, hash: u64, value: Evaluation, depth: u8, flag: EntryFlag, best_move: M) { + self.concurrent_store(hash, value, depth, flag, best_move) + } +} + impl ConcurrentTable where M: Copy, { // Using two-tier table, look in the two adjacent slots - pub(super) fn lookup(&self, hash: u64) -> Option> { + pub(super) fn concurrent_lookup(&self, hash: u64) -> Option> { let index = (hash as usize) & self.mask; for i in index..index + 2 { let entry = self.table[i].lock(); @@ -132,7 +151,9 @@ where None } - fn store(&self, hash: u64, value: Evaluation, depth: u8, flag: EntryFlag, best_move: M) { + fn concurrent_store( + &self, hash: u64, value: Evaluation, depth: u8, flag: EntryFlag, best_move: M, + ) { let table_gen = self.generation.load(Ordering::Relaxed); // index points to the first of a pair of entries, the depth-preferred entry and the always-replace entry. let index = (hash as usize) & self.mask; @@ -149,37 +170,8 @@ where *self.table[index + 1].lock() = new_entry; } - // Check and update negamax state based on any transposition table hit. - // Returns Some(value) on an exact match. - // Returns None, updating mutable arguments, if Negamax should continue to explore this node. - pub(super) fn check( - &self, hash: u64, depth: u8, good_move: &mut Option, alpha: &mut Evaluation, - beta: &mut Evaluation, - ) -> Option { - if let Some(entry) = self.lookup(hash) { - *good_move = entry.best_move; - if entry.depth >= depth { - match entry.flag { - EntryFlag::Exact => { - return Some(entry.value); - } - EntryFlag::Lowerbound => { - *alpha = max(*alpha, entry.value); - } - EntryFlag::Upperbound => { - *beta = min(*beta, entry.value); - } - } - if *alpha >= *beta { - return Some(entry.value); - } - } - } - None - } - // Update table based on negamax results. - pub(super) fn update( + pub(super) fn concurrent_update( &self, hash: u64, alpha_orig: Evaluation, beta: Evaluation, depth: u8, best: Evaluation, best_move: M, ) { @@ -190,6 +182,6 @@ where } else { EntryFlag::Exact }; - self.store(hash, best, depth, flag, best_move); + self.concurrent_store(hash, best, depth, flag, best_move); } } From d152dabbf900ed3507e60db4c5aeba54ca20d625 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Mon, 8 Mar 2021 22:28:34 -0800 Subject: [PATCH 051/162] Dedup populate_pv into Table trait. --- src/strategies/iterative.rs | 36 +---------------------------------- src/strategies/table.rs | 34 +++++++++++++++++++++++++++++++++ src/strategies/ybw.rs | 38 ++----------------------------------- 3 files changed, 37 insertions(+), 71 deletions(-) diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index eb13dd5..1b68a69 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -263,40 +263,6 @@ where unclamp_value(self.prev_value) } - // After finishing a search, populate the principal variation as deep as - // the table remembers it. - fn populate_pv(&mut self, s: &mut ::S, mut depth: u8) - where - ::S: Zobrist, - ::M: Copy, - { - self.pv.clear(); - let mut hash = s.zobrist_hash(); - while let Some(entry) = self.transposition_table.lookup(hash) { - // The principal variation should only have exact nodes, as other - // node types are from cutoffs where the node is proven to be - // worse than a previously explored one. - // - // Sometimes, it takes multiple rounds of narrowing bounds for the - // value to be exact, and we can't guarantee that the table entry - // will remain in the table between the searches that find - // equivalent upper and lower bounds. - let m = entry.best_move.unwrap(); - self.pv.push(m); - m.apply(s); - hash = s.zobrist_hash(); - // Prevent cyclical PVs from being infinitely long. - if depth == 0 { - break; - } - depth -= 1; - } - // Restore state. - for m in self.pv.iter().rev() { - m.undo(s); - } - } - /// Return what the engine considered to be the best sequence of moves /// from both sides. pub fn principal_variation(&self) -> &[::M] { @@ -472,7 +438,7 @@ where self.prev_value = entry.value; self.next_depth_nodes = 0; depth += self.opts.step_increment; - self.populate_pv(&mut s_clone, depth + 1); + self.transposition_table.populate_pv(&mut self.pv, &mut s_clone, depth + 1); } self.wall_time = start_time.elapsed(); best_move diff --git a/src/strategies/table.rs b/src/strategies/table.rs index 97e00ee..d504dfc 100644 --- a/src/strategies/table.rs +++ b/src/strategies/table.rs @@ -82,6 +82,40 @@ pub(super) trait Table { }; self.store(hash, best, depth, flag, best_move); } + + // After finishing a search, populate the principal variation as deep as + // the table remembers it. + fn populate_pv(&self, pv: &mut Vec, s: &mut G::S, mut depth: u8) + where + M: Move, + ::S: Zobrist, + { + pv.clear(); + let mut hash = s.zobrist_hash(); + while let Some(entry) = self.lookup(hash) { + // The principal variation should only have exact nodes, as other + // node types are from cutoffs where the node is proven to be + // worse than a previously explored one. + // + // Sometimes, it takes multiple rounds of narrowing bounds for the + // value to be exact, and we can't guarantee that the table entry + // will remain in the table between the searches that find + // equivalent upper and lower bounds. + let m = entry.best_move.unwrap(); + pv.push(m); + m.apply(s); + hash = s.zobrist_hash(); + // Prevent cyclical PVs from being infinitely long. + if depth == 0 { + break; + } + depth -= 1; + } + // Restore state. + for m in pv.iter().rev() { + m.undo(s); + } + } } // It would be nice to unify most of the implementation of the single-threaded diff --git a/src/strategies/ybw.rs b/src/strategies/ybw.rs index c1b52d3..a2f70bd 100644 --- a/src/strategies/ybw.rs +++ b/src/strategies/ybw.rs @@ -161,40 +161,6 @@ impl ParallelYbw { unclamp_value(self.prev_value) } - // After finishing a search, populate the principal variation as deep as - // the table remembers it. - fn populate_pv(&mut self, s: &mut ::S, mut depth: u8) - where - ::S: Zobrist, - ::M: Copy, - { - self.pv.clear(); - let mut hash = s.zobrist_hash(); - while let Some(entry) = self.table.lookup(hash) { - // The principal variation should only have exact nodes, as other - // node types are from cutoffs where the node is proven to be - // worse than a previously explored one. - // - // Sometimes, it takes multiple rounds of narrowing bounds for the - // value to be exact, and we can't guarantee that the table entry - // will remain in the table between the searches that find - // equivalent upper and lower bounds. - let m = entry.best_move.unwrap(); - self.pv.push(m); - m.apply(s); - hash = s.zobrist_hash(); - // Prevent cyclical PVs from being infinitely long. - if depth == 0 { - break; - } - depth -= 1; - } - // Restore state. - for m in self.pv.iter().rev() { - m.undo(s); - } - } - /// Return what the engine considered to be the best sequence of moves /// from both sides. pub fn principal_variation(&self) -> &[::M] { @@ -374,7 +340,7 @@ impl ParallelYbw { best_move.into_inner().unwrap().into_inner() }; - self.table.update(hash, alpha_orig, beta, depth, best, best_move); + self.table.concurrent_update(hash, alpha_orig, beta, depth, best, best_move); //self.move_pool.free(moves); Some(clamp_value(best)) } @@ -421,7 +387,7 @@ where self.prev_value = entry.value; self.next_depth_nodes = 0; depth += self.opts.step_increment; - self.populate_pv(&mut s_clone, depth + 1); + self.table.populate_pv(&mut self.pv, &mut s_clone, depth + 1); } self.wall_time = start_time.elapsed(); best_move From 1a5dae3daea0499a7e8090995a7dc3d59ab12b7c Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Mon, 8 Mar 2021 23:05:19 -0800 Subject: [PATCH 052/162] Refactor table-based negamax into its own type. I'll probably move it into its own file at some point. --- src/strategies/iterative.rs | 237 ++++++++++++++++++++---------------- src/strategies/table.rs | 9 +- 2 files changed, 137 insertions(+), 109 deletions(-) diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index 1b68a69..12438ab 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -49,10 +49,6 @@ impl TranspositionTable { } Self { table, mask, generation: 0, strategy } } - - fn advance_generation(&mut self) { - self.generation = self.generation.wrapping_add(1); - } } impl Table for TranspositionTable { @@ -107,6 +103,10 @@ impl Table for TranspositionTable { } } } + + fn advance_generation(&mut self) { + self.generation = self.generation.wrapping_add(1); + } } /// Options to use for the iterative search engine. @@ -175,107 +175,30 @@ impl IterativeOptions { } } -pub struct IterativeSearch { - max_depth: usize, - max_time: Duration, +pub(super) struct Negamaxer { timeout: Arc, - transposition_table: TranspositionTable<<::G as Game>::M>, + table: T, move_pool: MovePool<::M>, - prev_value: Evaluation, eval: E, - opts: IterativeOptions, - - // Runtime stats for the last move generated. + // Config + max_quiescence_depth: u8, + null_window_search: bool, - // Maximum depth used to produce the move. - actual_depth: u8, - // Nodes explored at each depth. - nodes_explored: Vec, - // Nodes explored past this depth, and thus only useful for filling TT for - // next choose_move. - next_depth_nodes: u64, - // For computing the average branching factor. + // Stats + nodes_explored: u64, total_generate_move_calls: u64, total_generated_moves: u64, - table_hits: usize, - pv: Vec<::M>, - wall_time: Duration, } -impl IterativeSearch +impl::M>> Negamaxer where ::M: Copy, { - pub fn new(eval: E, opts: IterativeOptions) -> IterativeSearch { - let table = TranspositionTable::new(opts.table_byte_size, opts.strategy); - IterativeSearch { - max_depth: 100, - max_time: Duration::from_secs(5), - timeout: Arc::new(AtomicBool::new(false)), - transposition_table: table, - move_pool: MovePool::<_>::default(), - prev_value: 0, - opts, - eval, - actual_depth: 0, - nodes_explored: Vec::new(), - next_depth_nodes: 0, - total_generate_move_calls: 0, - total_generated_moves: 0, - table_hits: 0, - pv: Vec::new(), - wall_time: Duration::default(), - } - } - - /// Set the maximum depth to search. Disables the timeout. - /// This can be changed between moves while reusing the transposition table. - pub fn set_max_depth(&mut self, depth: usize) { - self.max_depth = depth; - self.max_time = Duration::new(0, 0); - } - - /// Set the maximum time to compute the best move. When the timeout is - /// hit, it returns the best move found of the previous full - /// iteration. Unlimited max depth. - pub fn set_timeout(&mut self, max_time: Duration) { - self.max_time = max_time; - self.max_depth = 100; - } - - /// Return a human-readable summary of the last move generation. - pub fn stats(&self) -> String { - let total_nodes_explored: u64 = self.nodes_explored.iter().sum(); - let mean_branching_factor = - self.total_generated_moves as f64 / self.total_generate_move_calls as f64; - let effective_branching_factor = (*self.nodes_explored.last().unwrap_or(&0) as f64) - .powf((self.actual_depth as f64 + 1.0).recip()); - let throughput = - (total_nodes_explored + self.next_depth_nodes) as f64 / self.wall_time.as_secs_f64(); - format!("Explored {} nodes to depth {}. MBF={:.1} EBF={:.1}\nPartial exploration of next depth hit {} nodes.\n{} transposition table hits.\n{} nodes/sec", - total_nodes_explored, self.actual_depth, mean_branching_factor, effective_branching_factor, - self.next_depth_nodes, self.table_hits, throughput as usize) - } - - #[doc(hidden)] - pub fn root_value(&self) -> Evaluation { - unclamp_value(self.prev_value) - } - - /// Return what the engine considered to be the best sequence of moves - /// from both sides. - pub fn principal_variation(&self) -> &[::M] { - &self.pv[..] - } - // Negamax only among noisy moves. fn noisy_negamax( &mut self, s: &mut ::S, depth: u8, mut alpha: Evaluation, beta: Evaluation, - ) -> Option - where - ::M: Copy, - { + ) -> Option { if self.timeout.load(Ordering::Relaxed) { return None; } @@ -316,18 +239,18 @@ where ) -> Option where ::S: Zobrist, - ::M: Copy + Eq, + ::M: Eq, { if self.timeout.load(Ordering::Relaxed) { return None; } - self.next_depth_nodes += 1; + self.nodes_explored += 1; if depth == 0 { // Evaluate quiescence search on leaf nodes. // Will just return the node's evaluation if quiescence search is disabled. - return self.noisy_negamax(s, self.opts.max_quiescence_depth, alpha, beta); + return self.noisy_negamax(s, self.max_quiescence_depth, alpha, beta); } if let Some(winner) = E::G::get_winner(s) { return Some(winner.evaluate()); @@ -336,9 +259,7 @@ where let alpha_orig = alpha; let hash = s.zobrist_hash(); let mut good_move = None; - if let Some(value) = - self.transposition_table.check(hash, depth, &mut good_move, &mut alpha, &mut beta) - { + if let Some(value) = self.table.check(hash, depth, &mut good_move, &mut alpha, &mut beta) { return Some(value); } @@ -385,36 +306,136 @@ where alpha = value; // Now that we've found a good move, assume following moves // are worse, and seek to cull them without full evaluation. - null_window = self.opts.null_window_search; + null_window = self.null_window_search; } if alpha >= beta { break; } } - self.transposition_table.update(hash, alpha_orig, beta, depth, best, best_move); + self.table.update(hash, alpha_orig, beta, depth, best, best_move); self.move_pool.free(moves); Some(clamp_value(best)) } } +pub struct IterativeSearch { + max_depth: usize, + max_time: Duration, + //timeout: Arc, + negamaxer: Negamaxer::M>>, + //transposition_table: TranspositionTable<<::G as Game>::M>, + //move_pool: MovePool<::M>, + prev_value: Evaluation, + //eval: E, + opts: IterativeOptions, + + // Runtime stats for the last move generated. + + // Maximum depth used to produce the move. + actual_depth: u8, + // Nodes explored at each depth. + nodes_explored: Vec, + // Nodes explored past this depth, and thus only useful for filling TT for + // next choose_move. + //next_depth_nodes: u64, + // For computing the average branching factor. + //total_generate_move_calls: u64, + //total_generated_moves: u64, + table_hits: usize, + pv: Vec<::M>, + wall_time: Duration, +} + +impl IterativeSearch +where + ::M: Copy, +{ + pub fn new(eval: E, opts: IterativeOptions) -> IterativeSearch { + let table = TranspositionTable::new(opts.table_byte_size, opts.strategy); + let negamaxer = Negamaxer { + timeout: Arc::new(AtomicBool::new(false)), + table, + move_pool: MovePool::<_>::default(), + eval, + max_quiescence_depth: opts.max_quiescence_depth, + null_window_search: opts.null_window_search, + nodes_explored: 0, + total_generate_move_calls: 0, + total_generated_moves: 0, + }; + IterativeSearch { + max_depth: 100, + max_time: Duration::from_secs(5), + prev_value: 0, + negamaxer, + opts, + actual_depth: 0, + nodes_explored: Vec::new(), + table_hits: 0, + pv: Vec::new(), + wall_time: Duration::default(), + } + } + + /// Set the maximum depth to search. Disables the timeout. + /// This can be changed between moves while reusing the transposition table. + pub fn set_max_depth(&mut self, depth: usize) { + self.max_depth = depth; + self.max_time = Duration::new(0, 0); + } + + /// Set the maximum time to compute the best move. When the timeout is + /// hit, it returns the best move found of the previous full + /// iteration. Unlimited max depth. + pub fn set_timeout(&mut self, max_time: Duration) { + self.max_time = max_time; + self.max_depth = 100; + } + + /// Return a human-readable summary of the last move generation. + pub fn stats(&self) -> String { + let total_nodes_explored: u64 = self.nodes_explored.iter().sum(); + let mean_branching_factor = self.negamaxer.total_generated_moves as f64 + / self.negamaxer.total_generate_move_calls as f64; + let effective_branching_factor = (*self.nodes_explored.last().unwrap_or(&0) as f64) + .powf((self.actual_depth as f64 + 1.0).recip()); + let throughput = (total_nodes_explored + self.negamaxer.nodes_explored) as f64 + / self.wall_time.as_secs_f64(); + format!("Explored {} nodes to depth {}. MBF={:.1} EBF={:.1}\nPartial exploration of next depth hit {} nodes.\n{} transposition table hits.\n{} nodes/sec", + total_nodes_explored, self.actual_depth, mean_branching_factor, effective_branching_factor, + self.negamaxer.nodes_explored, self.table_hits, throughput as usize) + } + + #[doc(hidden)] + pub fn root_value(&self) -> Evaluation { + unclamp_value(self.prev_value) + } + + /// Return what the engine considered to be the best sequence of moves + /// from both sides. + pub fn principal_variation(&self) -> &[::M] { + &self.pv[..] + } +} + impl Strategy for IterativeSearch where ::S: Clone + Zobrist, ::M: Copy + Eq, { fn choose_move(&mut self, s: &::S) -> Option<::M> { - self.transposition_table.advance_generation(); + self.negamaxer.table.advance_generation(); // Reset stats. self.nodes_explored.clear(); - self.next_depth_nodes = 0; - self.total_generate_move_calls = 0; - self.total_generated_moves = 0; + self.negamaxer.nodes_explored = 0; + self.negamaxer.total_generate_move_calls = 0; + self.negamaxer.total_generated_moves = 0; self.actual_depth = 0; self.table_hits = 0; let start_time = Instant::now(); // Start timer if configured. - self.timeout = if self.max_time == Duration::new(0, 0) { + self.negamaxer.timeout = if self.max_time == Duration::new(0, 0) { Arc::new(AtomicBool::new(false)) } else { timeout_signal(self.max_time) @@ -426,19 +447,19 @@ where let mut depth = self.max_depth as u8 % self.opts.step_increment; while depth <= self.max_depth as u8 { - if self.negamax(&mut s_clone, depth + 1, WORST_EVAL, BEST_EVAL).is_none() { + if self.negamaxer.negamax(&mut s_clone, depth + 1, WORST_EVAL, BEST_EVAL).is_none() { // Timeout. Return the best move from the previous depth. break; } - let entry = self.transposition_table.lookup(root_hash).unwrap(); + let entry = self.negamaxer.table.lookup(root_hash).unwrap(); best_move = entry.best_move; self.actual_depth = max(self.actual_depth, depth); - self.nodes_explored.push(self.next_depth_nodes); + self.nodes_explored.push(self.negamaxer.nodes_explored); + self.negamaxer.nodes_explored = 0; self.prev_value = entry.value; - self.next_depth_nodes = 0; depth += self.opts.step_increment; - self.transposition_table.populate_pv(&mut self.pv, &mut s_clone, depth + 1); + self.negamaxer.table.populate_pv(&mut self.pv, &mut s_clone, depth + 1); } self.wall_time = start_time.elapsed(); best_move diff --git a/src/strategies/table.rs b/src/strategies/table.rs index d504dfc..93835a2 100644 --- a/src/strategies/table.rs +++ b/src/strategies/table.rs @@ -38,6 +38,7 @@ fn test_entry_size() { pub(super) trait Table { fn lookup(&self, hash: u64) -> Option>; fn store(&mut self, hash: u64, value: Evaluation, depth: u8, flag: EntryFlag, best_move: M); + fn advance_generation(&mut self); // Check and update negamax state based on any transposition table hit. // Returns Some(value) on an exact match. @@ -146,7 +147,7 @@ impl ConcurrentTable { Self { table, mask, generation: AtomicU8::new(0) } } - pub(super) fn advance_generation(&self) { + pub(super) fn concurrent_advance_generation(&self) { self.generation.fetch_add(1, Ordering::SeqCst); } } @@ -158,6 +159,9 @@ impl Table for ConcurrentTable { fn store(&mut self, hash: u64, value: Evaluation, depth: u8, flag: EntryFlag, best_move: M) { self.concurrent_store(hash, value, depth, flag, best_move) } + fn advance_generation(&mut self) { + self.concurrent_advance_generation() + } } impl Table for Arc> { @@ -167,6 +171,9 @@ impl Table for Arc> { fn store(&mut self, hash: u64, value: Evaluation, depth: u8, flag: EntryFlag, best_move: M) { self.concurrent_store(hash, value, depth, flag, best_move) } + fn advance_generation(&mut self) { + self.concurrent_advance_generation() + } } impl ConcurrentTable From 8feb462c9660deb74612d8a1268e24681f4f4c75 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Wed, 10 Mar 2021 11:08:02 -0800 Subject: [PATCH 053/162] Add aspiration window option in IterativeSearch. --- benches/negamax.rs | 4 ++-- examples/connect4.rs | 7 ++++-- src/strategies/iterative.rs | 47 ++++++++++++++++++++++++------------- tests/strategies.rs | 6 +++-- 4 files changed, 42 insertions(+), 22 deletions(-) diff --git a/benches/negamax.rs b/benches/negamax.rs index c9ea242..67a5d99 100644 --- a/benches/negamax.rs +++ b/benches/negamax.rs @@ -21,7 +21,7 @@ fn bench_iterative(b: &mut Bencher) { b.iter(|| { let mut s = IterativeSearch::new( connect4::BasicEvaluator::default(), - IterativeOptions::new().with_table_byte_size(32_000).with_null_window_search(true), + IterativeOptions::new().with_table_byte_size(32_000), ); s.set_max_depth(5); let m = s.choose_move(&board); @@ -34,7 +34,7 @@ fn bench_parallel(b: &mut Bencher) { b.iter(|| { let mut s = ParallelYbw::new( connect4::BasicEvaluator::default(), - YbwOptions::new().with_table_byte_size(32_000).with_null_window_search(true), + YbwOptions::new().with_table_byte_size(32_000), ); s.set_max_depth(5); let m = s.choose_move(&board); diff --git a/examples/connect4.rs b/examples/connect4.rs index 26fb802..0eb2d5a 100644 --- a/examples/connect4.rs +++ b/examples/connect4.rs @@ -266,12 +266,15 @@ fn main() { ); dumb.set_max_depth(8); - let opts = IterativeOptions::new().with_table_byte_size(16_000_000); + let opts = IterativeOptions::new() + .with_table_byte_size(64_000_000) + .with_double_step_increment() + .with_aspiration_window(5); let mut iterative = IterativeSearch::new(BasicEvaluator::default(), opts); iterative.set_max_depth(12); let mut parallel = ParallelYbw::new( BasicEvaluator::default(), - YbwOptions::new().with_table_byte_size(16_000_000), + YbwOptions::new().with_table_byte_size(64_000_000).with_double_step_increment(), ); parallel.set_max_depth(12); let mut strategies: [&mut dyn Strategy; 3] = diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index 12438ab..4d3a434 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -115,6 +115,7 @@ pub struct IterativeOptions { table_byte_size: usize, strategy: Replacement, null_window_search: bool, + aspiration_window: Option, step_increment: u8, max_quiescence_depth: u8, } @@ -125,6 +126,7 @@ impl IterativeOptions { table_byte_size: 1_000_000, strategy: Replacement::TwoTier, null_window_search: true, + aspiration_window: None, step_increment: 1, max_quiescence_depth: 0, } @@ -159,6 +161,13 @@ impl IterativeOptions { self } + /// Whether to search first in a narrow window around the previous root + /// value on each iteration. + pub fn with_aspiration_window(mut self, window: Evaluation) -> Self { + self.aspiration_window = Some(window); + self + } + /// Increment the depth by two between iterations. pub fn with_double_step_increment(mut self) -> Self { self.step_increment = 2; @@ -193,7 +202,8 @@ pub(super) struct Negamaxer { impl::M>> Negamaxer where - ::M: Copy, + ::S: Zobrist, + ::M: Copy + Eq, { // Negamax only among noisy moves. fn noisy_negamax( @@ -236,11 +246,7 @@ where fn negamax( &mut self, s: &mut ::S, depth: u8, mut alpha: Evaluation, mut beta: Evaluation, - ) -> Option - where - ::S: Zobrist, - ::M: Eq, - { + ) -> Option { if self.timeout.load(Ordering::Relaxed) { return None; } @@ -317,17 +323,28 @@ where self.move_pool.free(moves); Some(clamp_value(best)) } + + // Try to find the value within a window around the estimated value. + // Results, whether exact, overshoot, or undershoot, are stored in the table. + fn aspiration_search( + &mut self, s: &mut ::S, depth: u8, target: Evaluation, window: Evaluation, + ) -> Option<()> { + if depth < 2 { + // Do a full search on shallow nodes to establish the target. + return Some(()); + } + let alpha = max(target.saturating_sub(window), WORST_EVAL); + let beta = target.saturating_add(window); + self.negamax(s, depth, alpha, beta)?; + Some(()) + } } pub struct IterativeSearch { max_depth: usize, max_time: Duration, - //timeout: Arc, negamaxer: Negamaxer::M>>, - //transposition_table: TranspositionTable<<::G as Game>::M>, - //move_pool: MovePool<::M>, prev_value: Evaluation, - //eval: E, opts: IterativeOptions, // Runtime stats for the last move generated. @@ -336,12 +353,6 @@ pub struct IterativeSearch { actual_depth: u8, // Nodes explored at each depth. nodes_explored: Vec, - // Nodes explored past this depth, and thus only useful for filling TT for - // next choose_move. - //next_depth_nodes: u64, - // For computing the average branching factor. - //total_generate_move_calls: u64, - //total_generated_moves: u64, table_hits: usize, pv: Vec<::M>, wall_time: Duration, @@ -447,6 +458,10 @@ where let mut depth = self.max_depth as u8 % self.opts.step_increment; while depth <= self.max_depth as u8 { + if let Some(window) = self.opts.aspiration_window { + // Results of the search are stored in the table. + self.negamaxer.aspiration_search(&mut s_clone, depth + 1, self.prev_value, window); + } if self.negamaxer.negamax(&mut s_clone, depth + 1, WORST_EVAL, BEST_EVAL).is_none() { // Timeout. Return the best move from the previous depth. break; diff --git a/tests/strategies.rs b/tests/strategies.rs index 1ff4501..a30a2ce 100644 --- a/tests/strategies.rs +++ b/tests/strategies.rs @@ -143,11 +143,13 @@ fn compare_plain_negamax() { for (option_num, opt) in vec![ IterativeOptions::new() .with_replacement_strategy(Replacement::DepthPreferred) - .with_null_window_search(true), + .with_null_window_search(false), IterativeOptions::new() .with_replacement_strategy(Replacement::Always) .with_double_step_increment(), - IterativeOptions::new().with_replacement_strategy(Replacement::TwoTier), + IterativeOptions::new() + .with_replacement_strategy(Replacement::TwoTier) + .with_aspiration_window(5), ] .drain(..) .enumerate() From 21e0e16739f86ccfc33e314a6fcfdb52a0c516f4 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Fri, 12 Mar 2021 22:37:07 -0800 Subject: [PATCH 054/162] Add another parallel strategy based on LazySMP. Uses way less parallelism in my benchmark, but it's just as slowed down by the entry mutex in ConcurrentTable. --- Cargo.toml | 3 +- examples/connect4.rs | 20 +- src/lib.rs | 1 + src/strategies/iterative.rs | 56 ++++-- src/strategies/lazy_smp.rs | 351 ++++++++++++++++++++++++++++++++++++ src/strategies/mod.rs | 1 + src/strategies/negamax.rs | 6 +- src/strategies/random.rs | 10 +- tests/strategies.rs | 44 ++++- 9 files changed, 446 insertions(+), 46 deletions(-) create mode 100644 src/strategies/lazy_smp.rs diff --git a/Cargo.toml b/Cargo.toml index 37c125e..b9533c1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,8 +11,9 @@ keywords = ["ai", "game", "minimax", "negamax"] license = "MIT" [dependencies] +num_cpus = "1.0" parking_lot = "^0.11" -rand = "0.4.*" +rand = "0.8" rayon = "^1.5" [dev-dependencies] diff --git a/examples/connect4.rs b/examples/connect4.rs index 0eb2d5a..918caf0 100644 --- a/examples/connect4.rs +++ b/examples/connect4.rs @@ -195,6 +195,7 @@ impl Board { } } +#[derive(Clone)] pub struct BasicEvaluator; impl Default for BasicEvaluator { @@ -250,8 +251,7 @@ impl minimax::Evaluator for BasicEvaluator { } fn main() { - use minimax::{perft, Game, Move, Strategy}; - use minimax::{IterativeOptions, IterativeSearch, ParallelYbw, YbwOptions}; + use minimax::*; let mut b = Board::default(); @@ -272,16 +272,24 @@ fn main() { .with_aspiration_window(5); let mut iterative = IterativeSearch::new(BasicEvaluator::default(), opts); iterative.set_max_depth(12); - let mut parallel = ParallelYbw::new( + let mut parallelybw = ParallelYbw::new( BasicEvaluator::default(), YbwOptions::new().with_table_byte_size(64_000_000).with_double_step_increment(), ); - parallel.set_max_depth(12); - let mut strategies: [&mut dyn Strategy; 3] = - [&mut dumb, &mut iterative, &mut parallel]; + parallelybw.set_max_depth(12); + let mut lazysmp = LazySmp::new( + BasicEvaluator::default(), + LazySmpOptions::new().with_table_byte_size(64_000_000).with_double_step_increment(), + ); + lazysmp.set_max_depth(12); + + let mut strategies: [&mut dyn Strategy; 4] = + [&mut dumb, &mut iterative, &mut parallelybw, &mut lazysmp]; if std::env::args().any(|arg| arg == "parallel") { strategies.swap(1, 2); + } else if std::env::args().any(|arg| arg == "lazysmp") { + strategies.swap(1, 3); } let mut s = 0; diff --git a/src/lib.rs b/src/lib.rs index 71e33a1..7392328 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,6 +8,7 @@ pub use interface::{ Evaluation, Evaluator, Game, Move, Strategy, Winner, Zobrist, BEST_EVAL, WORST_EVAL, }; pub use strategies::iterative::{IterativeOptions, IterativeSearch, Replacement}; +pub use strategies::lazy_smp::{LazySmp, LazySmpOptions}; pub use strategies::negamax::Negamax; pub use strategies::ybw::{ParallelYbw, YbwOptions}; pub use util::perft; diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index 4d3a434..ed5b613 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -186,7 +186,7 @@ impl IterativeOptions { pub(super) struct Negamaxer { timeout: Arc, - table: T, + pub(super) table: T, move_pool: MovePool<::M>, eval: E, @@ -205,6 +205,32 @@ where ::S: Zobrist, ::M: Copy + Eq, { + pub(super) fn new( + table: T, eval: E, max_quiescence_depth: u8, null_window_search: bool, + ) -> Self { + Self { + timeout: Arc::new(AtomicBool::new(false)), + table, + eval, + move_pool: MovePool::default(), + max_quiescence_depth, + null_window_search, + nodes_explored: 0, + total_generate_move_calls: 0, + total_generated_moves: 0, + } + } + + pub(super) fn set_timeout(&mut self, timeout: Arc) { + self.timeout = timeout; + } + + fn reset_stats(&mut self) { + self.nodes_explored = 0; + self.total_generate_move_calls = 0; + self.total_generated_moves = 0; + } + // Negamax only among noisy moves. fn noisy_negamax( &mut self, s: &mut ::S, depth: u8, mut alpha: Evaluation, beta: Evaluation, @@ -243,7 +269,7 @@ where } // Recursively compute negamax on the game state. Returns None if it hits the timeout. - fn negamax( + pub(super) fn negamax( &mut self, s: &mut ::S, depth: u8, mut alpha: Evaluation, mut beta: Evaluation, ) -> Option { @@ -326,7 +352,7 @@ where // Try to find the value within a window around the estimated value. // Results, whether exact, overshoot, or undershoot, are stored in the table. - fn aspiration_search( + pub(super) fn aspiration_search( &mut self, s: &mut ::S, depth: u8, target: Evaluation, window: Evaluation, ) -> Option<()> { if depth < 2 { @@ -360,21 +386,13 @@ pub struct IterativeSearch { impl IterativeSearch where - ::M: Copy, + ::M: Copy + Eq, + ::S: Clone + Zobrist, { pub fn new(eval: E, opts: IterativeOptions) -> IterativeSearch { let table = TranspositionTable::new(opts.table_byte_size, opts.strategy); - let negamaxer = Negamaxer { - timeout: Arc::new(AtomicBool::new(false)), - table, - move_pool: MovePool::<_>::default(), - eval, - max_quiescence_depth: opts.max_quiescence_depth, - null_window_search: opts.null_window_search, - nodes_explored: 0, - total_generate_move_calls: 0, - total_generated_moves: 0, - }; + let negamaxer = + Negamaxer::new(table, eval, opts.max_quiescence_depth, opts.null_window_search); IterativeSearch { max_depth: 100, max_time: Duration::from_secs(5), @@ -439,18 +457,16 @@ where self.negamaxer.table.advance_generation(); // Reset stats. self.nodes_explored.clear(); - self.negamaxer.nodes_explored = 0; - self.negamaxer.total_generate_move_calls = 0; - self.negamaxer.total_generated_moves = 0; + self.negamaxer.reset_stats(); self.actual_depth = 0; self.table_hits = 0; let start_time = Instant::now(); // Start timer if configured. - self.negamaxer.timeout = if self.max_time == Duration::new(0, 0) { + self.negamaxer.set_timeout(if self.max_time == Duration::new(0, 0) { Arc::new(AtomicBool::new(false)) } else { timeout_signal(self.max_time) - }; + }); let root_hash = s.zobrist_hash(); let mut s_clone = s.clone(); diff --git a/src/strategies/lazy_smp.rs b/src/strategies/lazy_smp.rs new file mode 100644 index 0000000..42daf48 --- /dev/null +++ b/src/strategies/lazy_smp.rs @@ -0,0 +1,351 @@ +//! An implementation of parallelized Negamax via the Lazy Symmetric +//! MultiProcessing algorithm. +//! +//! This parallel algorithm minimizes cross-thread synchronization and +//! minimizes game state cloning, at the expense of doing more duplicative +//! work across different threads. + +extern crate num_cpus; +extern crate rand; + +use super::super::interface::*; +use super::iterative::Negamaxer; +use super::table::*; +use super::util::*; + +use rand::seq::SliceRandom; +use std::cmp::max; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::{Arc, Condvar, Mutex}; +use std::thread::spawn; +use std::time::{Duration, Instant}; + +/// Options to use for the iterative search engine. +#[derive(Clone, Copy)] +pub struct LazySmpOptions { + table_byte_size: usize, + step_increment: u8, + max_quiescence_depth: u8, + aspiration_window: Option, + // Default is one per core. + num_threads: Option, + // TODO: optional bonus thread local TT? + // TODO: min_TT_depth? + // TODO: alternating depths in alternating threads +} + +impl LazySmpOptions { + pub fn new() -> Self { + LazySmpOptions { + table_byte_size: 32_000_000, + step_increment: 1, + max_quiescence_depth: 0, + aspiration_window: None, + num_threads: None, + } + } +} + +impl Default for LazySmpOptions { + fn default() -> Self { + Self::new() + } +} + +impl LazySmpOptions { + /// Approximately how large the transposition table should be in memory. + pub fn with_table_byte_size(mut self, size: usize) -> Self { + self.table_byte_size = size; + self + } + + /// Increment the depth by two between iterations. + pub fn with_double_step_increment(mut self) -> Self { + self.step_increment = 2; + self + } + + /// Enable [quiescence + /// search](https://en.wikipedia.org/wiki/Quiescence_search) at the leaves + /// of the search tree. The Game must implement `generate_noisy_moves` + /// for the search to know when the state has become "quiet". + pub fn with_quiescence_search_depth(mut self, depth: u8) -> Self { + self.max_quiescence_depth = depth; + self + } + + pub fn with_num_threads(mut self, num_threads: usize) -> Self { + self.num_threads = Some(num_threads); + self + } + + /// Whether to search first in a narrow window around the previous root + /// value on each iteration. + pub fn with_aspiration_window(mut self, window: Evaluation) -> Self { + self.aspiration_window = Some(window); + self + } +} + +#[derive(Clone)] +struct Search { + state: S, + depth: u8, + timeout: Arc, +} + +// A directive to the helper threads. +enum Command { + Wait, + Exit, + Search(Search), +} + +struct Helper +where + ::S: Clone, + ::M: Copy + Eq, +{ + negamaxer: Negamaxer::M>>>, + command: Arc::S>>>, + waiter: Arc, +} + +impl Helper +where + ::S: Clone + Zobrist, + ::M: Copy + Eq, +{ + fn process(&mut self) { + loop { + let mut search = { + let command = self.command.lock().unwrap(); + let command = + self.waiter.wait_while(command, |c| matches!(*c, Command::Wait)).unwrap(); + match *command { + Command::Exit => return, + Command::Wait => continue, + Command::Search(ref search) => search.clone(), + } + }; + + self.negamaxer.set_timeout(search.timeout.clone()); + let mut alpha = WORST_EVAL; + let mut beta = BEST_EVAL; + self.negamaxer.table.check( + search.state.zobrist_hash(), + search.depth, + &mut None, + &mut alpha, + &mut beta, + ); + + // Randomize the first level of moves. + let mut moves = Vec::new(); + E::G::generate_moves(&search.state, &mut moves); + moves.shuffle(&mut rand::thread_rng()); + // Negamax search the rest. + for m in moves { + m.apply(&mut search.state); + if let Some(value) = + self.negamaxer.negamax(&mut search.state, search.depth, alpha, beta) + { + alpha = max(alpha, -value); + } else { + break; + } + if alpha >= beta { + break; + } + m.undo(&mut search.state); + } + + // Computation finished or interrupted, go back to sleep. + } + } +} + +pub struct LazySmp +where + ::S: Clone + Zobrist, + ::M: Copy + Eq, +{ + max_depth: usize, + max_time: Duration, + table: Arc::M>>, + negamaxer: Negamaxer::M>>>, + command: Arc::S>>>, + signal: Arc, + + opts: LazySmpOptions, + + // Runtime stats for the last move generated. + prev_value: Evaluation, + // Maximum depth used to produce the move. + actual_depth: u8, + // Nodes explored at each depth. + nodes_explored: Vec, + pv: Vec<::M>, + wall_time: Duration, +} + +impl Drop for LazySmp +where + ::S: Clone + Zobrist, + ::M: Copy + Eq, +{ + fn drop(&mut self) { + *self.command.lock().unwrap() = Command::Exit; + self.signal.notify_all(); + } +} + +impl LazySmp +where + ::S: Clone + Zobrist + Send, + ::M: Copy + Eq + Send, + E: Clone + Send, +{ + pub fn new(eval: E, opts: LazySmpOptions) -> LazySmp + where + E: 'static, + { + let table = Arc::new(ConcurrentTable::new(opts.table_byte_size)); + let command = Arc::new(Mutex::new(Command::Wait)); + let signal = Arc::new(Condvar::new()); + // start n-1 helper threads + for _ in 1..opts.num_threads.unwrap_or_else(num_cpus::get) { + let table2 = table.clone(); + let eval2 = eval.clone(); + let command2 = command.clone(); + let waiter = signal.clone(); + spawn(move || { + let mut helper = Helper { + negamaxer: Negamaxer::new(table2, eval2, opts.max_quiescence_depth, true), + command: command2, + waiter, + }; + helper.process(); + }); + } + let negamaxer = Negamaxer::new(table.clone(), eval, opts.max_quiescence_depth, true); + LazySmp { + max_depth: 100, + max_time: Duration::from_secs(5), + table, + negamaxer, + command, + signal, + prev_value: 0, + opts, + actual_depth: 0, + nodes_explored: Vec::new(), + pv: Vec::new(), + wall_time: Duration::default(), + } + } + + /// Set the maximum depth to search. Disables the timeout. + /// This can be changed between moves while reusing the transposition table. + pub fn set_max_depth(&mut self, depth: usize) { + self.max_depth = depth; + self.max_time = Duration::new(0, 0); + } + + /// Set the maximum time to compute the best move. When the timeout is + /// hit, it returns the best move found of the previous full + /// iteration. Unlimited max depth. + pub fn set_timeout(&mut self, max_time: Duration) { + self.max_time = max_time; + self.max_depth = 100; + } + + // TODO: gather stats from helper threads. + // Return a human-readable summary of the last move generation. + //pub fn stats(&self) -> String { + //} + + #[doc(hidden)] + pub fn root_value(&self) -> Evaluation { + unclamp_value(self.prev_value) + } + + /// Return what the engine considered to be the best sequence of moves + /// from both sides. + pub fn principal_variation(&self) -> &[::M] { + &self.pv[..] + } +} + +impl Strategy for LazySmp +where + ::S: Clone + Zobrist, + ::M: Copy + Eq, +{ + fn choose_move(&mut self, s: &::S) -> Option<::M> { + self.table.concurrent_advance_generation(); + // Reset stats. + self.nodes_explored.clear(); + self.actual_depth = 0; + let start_time = Instant::now(); + // Start timer if configured. + self.negamaxer.set_timeout(if self.max_time == Duration::new(0, 0) { + Arc::new(AtomicBool::new(false)) + } else { + timeout_signal(self.max_time) + }); + + let root_hash = s.zobrist_hash(); + let mut s_clone = s.clone(); + let mut best_move = None; + + let mut depth = self.max_depth as u8 % self.opts.step_increment; + while depth <= self.max_depth as u8 { + // First, a serial aspiration search to at least establish some bounds. + if self + .negamaxer + .aspiration_search( + &mut s_clone, + depth + 1, + self.prev_value, + self.opts.aspiration_window.unwrap_or(2), + ) + .is_none() + { + // Timeout. + break; + } + + let iteration_done = Arc::new(AtomicBool::new(false)); + { + let mut command = self.command.lock().unwrap(); + *command = Command::Search(Search { + state: s.clone(), + depth, + timeout: iteration_done.clone(), + }); + self.signal.notify_all(); + } + + let value = self.negamaxer.negamax(&mut s_clone, depth + 1, WORST_EVAL, BEST_EVAL); + { + *self.command.lock().unwrap() = Command::Wait; + } + iteration_done.store(true, Ordering::Relaxed); + if value.is_none() { + // Timeout. Return the best move from the previous depth. + break; + } + + let entry = self.table.lookup(root_hash).unwrap(); + best_move = entry.best_move; + + self.actual_depth = max(self.actual_depth, depth); + self.prev_value = entry.value; + depth += self.opts.step_increment; + self.table.populate_pv(&mut self.pv, &mut s_clone, depth + 1); + } + self.wall_time = start_time.elapsed(); + best_move + } +} diff --git a/src/strategies/mod.rs b/src/strategies/mod.rs index 1505756..2b8664e 100644 --- a/src/strategies/mod.rs +++ b/src/strategies/mod.rs @@ -1,6 +1,7 @@ //! Strategy implementations. pub mod iterative; +pub mod lazy_smp; pub mod negamax; pub mod random; pub mod ybw; diff --git a/src/strategies/negamax.rs b/src/strategies/negamax.rs index 645d013..670e758 100644 --- a/src/strategies/negamax.rs +++ b/src/strategies/negamax.rs @@ -6,13 +6,13 @@ use super::super::interface::*; use super::super::util::*; use super::util::*; -use rand::Rng; +use rand::seq::SliceRandom; use std::cmp::max; pub struct Negamax { max_depth: usize, move_pool: MovePool<::M>, - rng: rand::ThreadRng, + rng: rand::rngs::ThreadRng, prev_value: Evaluation, eval: E, } @@ -74,7 +74,7 @@ where E::G::generate_moves(s, &mut moves); // Randomly permute order that we look at the moves. // We'll pick the first best score from this list. - self.rng.shuffle(&mut moves[..]); + moves.shuffle(&mut self.rng); let mut best_move = *moves.first()?; let mut s_clone = s.clone(); diff --git a/src/strategies/random.rs b/src/strategies/random.rs index ed831d2..bdb782a 100644 --- a/src/strategies/random.rs +++ b/src/strategies/random.rs @@ -1,10 +1,10 @@ //! A strategy that randomly chooses a move, for use in tests. use super::super::interface::*; -use rand::Rng; +use rand::seq::SliceRandom; pub struct Random { - rng: rand::ThreadRng, + rng: rand::rngs::ThreadRng, } impl Random { @@ -26,10 +26,6 @@ where fn choose_move(&mut self, s: &G::S) -> Option { let mut moves = Vec::new(); G::generate_moves(s, &mut moves); - if moves.is_empty() { - None - } else { - Some(moves[self.rng.gen_range(0, moves.len())]) - } + moves.choose(&mut self.rng).copied() } } diff --git a/tests/strategies.rs b/tests/strategies.rs index a30a2ce..88d998b 100644 --- a/tests/strategies.rs +++ b/tests/strategies.rs @@ -10,7 +10,7 @@ extern crate rand; mod connect4; use minimax::*; -use rand::Rng; +use rand::seq::SliceRandom; use std::cmp::max; use std::collections::hash_map::DefaultHasher; use std::hash::Hasher; @@ -80,6 +80,7 @@ where } } +#[derive(Clone)] struct RandomEvaluator; impl Default for RandomEvaluator { @@ -107,7 +108,7 @@ fn generate_random_state(depth: usize) -> connect4::Board { for _ in 0..depth { let mut moves = Vec::new(); connect4::Game::generate_moves(&b, &mut moves); - let m = moves[rng.gen_range(0, moves.len())]; + let m = moves.choose(&mut rng).unwrap(); m.apply(&mut b); if connect4::Game::get_winner(&b).is_some() { // Oops, undo and try again on the next iter. @@ -175,18 +176,34 @@ fn compare_plain_negamax() { ); } - let mut parallel = ParallelYbw::new( + let mut ybw = ParallelYbw::new( RandomEvaluator::default(), YbwOptions::default().with_table_byte_size(64000), ); - parallel.set_max_depth(max_depth); - let parallel_move = parallel.choose_move(&b).unwrap(); - let parallel_value = parallel.root_value(); - assert_eq!(value, parallel_value, "search depth={}\n{}", max_depth, b); + ybw.set_max_depth(max_depth); + let ybw_move = ybw.choose_move(&b).unwrap(); + let ybw_value = ybw.root_value(); + assert_eq!(value, ybw_value, "search depth={}\n{}", max_depth, b); assert!( - plain_negamax.best_moves.contains(¶llel_move), + plain_negamax.best_moves.contains(&ybw_move), "bad move={:?}\nsearch depth={}\n{}", - parallel_move, + ybw_move, + max_depth, + b + ); + + let mut lazysmp = LazySmp::new( + RandomEvaluator::default(), + LazySmpOptions::default().with_table_byte_size(64000), + ); + lazysmp.set_max_depth(max_depth); + let lazysmp_move = lazysmp.choose_move(&b).unwrap(); + let lazysmp_value = lazysmp.root_value(); + assert_eq!(value, lazysmp_value, "search depth={}\n{}", max_depth, b); + assert!( + plain_negamax.best_moves.contains(&lazysmp_move), + "bad move={:?}\nsearch depth={}\n{}", + lazysmp_move, max_depth, b ); @@ -223,6 +240,15 @@ fn compare_deep_negamax() { parallel.choose_move(&b).unwrap(); let parallel_value = parallel.root_value(); assert_eq!(value, parallel_value, "search depth={}\n{}", max_depth, b); + + let mut lazysmp = LazySmp::new( + RandomEvaluator::default(), + LazySmpOptions::default().with_table_byte_size(64000), + ); + lazysmp.set_max_depth(max_depth); + lazysmp.choose_move(&b).unwrap(); + let lazysmp_value = lazysmp.root_value(); + assert_eq!(value, lazysmp_value, "search depth={}\n{}", max_depth, b); } } } From d50e0041174cfe01433d28c2a088d9ba22f2ab2d Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Fri, 12 Mar 2021 23:13:42 -0800 Subject: [PATCH 055/162] Experimental unsafe table that doesn't bother to use atomics. Totally removes the lock contention from ParallelYbw, and it still passes the tests. --- src/strategies/table.rs | 108 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) diff --git a/src/strategies/table.rs b/src/strategies/table.rs index 93835a2..2c1f6d0 100644 --- a/src/strategies/table.rs +++ b/src/strategies/table.rs @@ -226,3 +226,111 @@ where self.concurrent_store(hash, best, depth, flag, best_move); } } + +// A concurrent table that doesn't bother to use atomic operations to access its entries. +// It's crazily unsafe, but somehow StockFish gets away with this? +pub(super) struct RacyTable { + table: Vec>, + mask: usize, + // Incremented for each iterative deepening run. + // Values from old generations are always overwritten. + generation: AtomicU8, +} + +#[allow(dead_code)] +impl RacyTable { + pub(super) fn new(table_byte_size: usize) -> Self { + let size = (table_byte_size / std::mem::size_of::>()).next_power_of_two(); + let mask = size - 1; + let mut table = Vec::with_capacity(size); + for _ in 0..size { + table.push(Entry:: { + hash: 0, + value: 0, + depth: 0, + flag: EntryFlag::Exact, + generation: 0, + best_move: None, + }); + } + Self { table, mask, generation: AtomicU8::new(0) } + } + + pub(super) fn concurrent_advance_generation(&self) { + self.generation.fetch_add(1, Ordering::SeqCst); + } +} + +impl Table for RacyTable { + fn lookup(&self, hash: u64) -> Option> { + self.concurrent_lookup(hash) + } + fn store(&mut self, hash: u64, value: Evaluation, depth: u8, flag: EntryFlag, best_move: M) { + self.concurrent_store(hash, value, depth, flag, best_move) + } + fn advance_generation(&mut self) { + self.concurrent_advance_generation() + } +} + +impl Table for Arc> { + fn lookup(&self, hash: u64) -> Option> { + self.concurrent_lookup(hash) + } + fn store(&mut self, hash: u64, value: Evaluation, depth: u8, flag: EntryFlag, best_move: M) { + self.concurrent_store(hash, value, depth, flag, best_move) + } + fn advance_generation(&mut self) { + self.concurrent_advance_generation() + } +} + +#[allow(dead_code)] +impl RacyTable +where + M: Copy, +{ + pub(super) fn concurrent_lookup(&self, hash: u64) -> Option> { + let index = (hash as usize) & self.mask; + let entry = self.table[index]; + if hash == entry.hash { + return Some(entry); + } + None + } + + fn concurrent_store( + &self, hash: u64, value: Evaluation, depth: u8, flag: EntryFlag, best_move: M, + ) { + let table_gen = self.generation.load(Ordering::Relaxed); + let index = (hash as usize) & self.mask; + let entry = &self.table[index]; + if entry.generation != table_gen || entry.depth <= depth { + #[allow(mutable_transmutes)] + let ptr = unsafe { std::mem::transmute::<&Entry, &mut Entry>(entry) }; + *ptr = Entry { + hash, + value, + depth, + flag, + generation: table_gen, + best_move: Some(best_move), + }; + } + } + + // Update table based on negamax results. + pub(super) fn concurrent_update( + &self, hash: u64, alpha_orig: Evaluation, beta: Evaluation, depth: u8, best: Evaluation, + best_move: M, + ) { + let flag = if best <= alpha_orig { + EntryFlag::Upperbound + } else if best >= beta { + EntryFlag::Lowerbound + } else { + EntryFlag::Exact + }; + self.concurrent_store(hash, best, depth, flag, best_move); + } +} From 68b898ec9bfd207a80273e1bc5d5f199829e054a Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Sat, 13 Mar 2021 10:25:58 -0800 Subject: [PATCH 056/162] Add less unsafe lock-free table with similar benchmarks. --- src/strategies/lazy_smp.rs | 8 +- src/strategies/table.rs | 261 ++++++++++++++++++++++++------------- src/strategies/ybw.rs | 4 +- 3 files changed, 178 insertions(+), 95 deletions(-) diff --git a/src/strategies/lazy_smp.rs b/src/strategies/lazy_smp.rs index 42daf48..a708e26 100644 --- a/src/strategies/lazy_smp.rs +++ b/src/strategies/lazy_smp.rs @@ -106,7 +106,7 @@ where ::S: Clone, ::M: Copy + Eq, { - negamaxer: Negamaxer::M>>>, + negamaxer: Negamaxer::M>>>, command: Arc::S>>>, waiter: Arc, } @@ -172,8 +172,8 @@ where { max_depth: usize, max_time: Duration, - table: Arc::M>>, - negamaxer: Negamaxer::M>>>, + table: Arc::M>>, + negamaxer: Negamaxer::M>>>, command: Arc::S>>>, signal: Arc, @@ -210,7 +210,7 @@ where where E: 'static, { - let table = Arc::new(ConcurrentTable::new(opts.table_byte_size)); + let table = Arc::new(LockfreeTable::new(opts.table_byte_size)); let command = Arc::new(Mutex::new(Command::Wait)); let signal = Arc::new(Condvar::new()); // start n-1 helper threads diff --git a/src/strategies/table.rs b/src/strategies/table.rs index 2c1f6d0..2ae7baa 100644 --- a/src/strategies/table.rs +++ b/src/strategies/table.rs @@ -3,7 +3,7 @@ extern crate parking_lot; use crate::interface::*; use parking_lot::Mutex; use std::cmp::{max, min}; -use std::sync::atomic::{AtomicU8, Ordering}; +use std::sync::atomic::{AtomicU32, AtomicU8, Ordering}; use std::sync::Arc; // Common transposition table stuff. @@ -31,6 +31,7 @@ fn test_entry_size() { // TODO: ratchet down assert!(std::mem::size_of::>() <= 24); assert!(std::mem::size_of::>>() <= 32); + assert_eq!(std::mem::size_of::>(), 20); } // A trait for a transposition table. The methods are mutual exclusion, but @@ -119,9 +120,41 @@ pub(super) trait Table { } } -// It would be nice to unify most of the implementation of the single-threaded -// and concurrent tables, but the methods need different signatures. -pub(super) struct ConcurrentTable { +pub(super) trait ConcurrentTable { + fn concurrent_store( + &self, hash: u64, value: Evaluation, depth: u8, flag: EntryFlag, best_move: M, + ); + fn concurrent_advance_generation(&self); + + // Update table based on negamax results. + fn concurrent_update( + &self, hash: u64, alpha_orig: Evaluation, beta: Evaluation, depth: u8, best: Evaluation, + best_move: M, + ) { + let flag = if best <= alpha_orig { + EntryFlag::Upperbound + } else if best >= beta { + EntryFlag::Lowerbound + } else { + EntryFlag::Exact + }; + self.concurrent_store(hash, best, depth, flag, best_move); + } +} + +impl + ConcurrentTable> Table for Arc { + fn lookup(&self, hash: u64) -> Option> { + (**self).lookup(hash) + } + fn store(&mut self, hash: u64, value: Evaluation, depth: u8, flag: EntryFlag, best_move: M) { + self.concurrent_store(hash, value, depth, flag, best_move) + } + fn advance_generation(&mut self) { + self.concurrent_advance_generation() + } +} + +pub(super) struct ShardedTable { table: Vec>>, mask: usize, // Incremented for each iterative deepening run. @@ -129,7 +162,8 @@ pub(super) struct ConcurrentTable { generation: AtomicU8, } -impl ConcurrentTable { +#[allow(dead_code)] +impl ShardedTable { pub(super) fn new(table_byte_size: usize) -> Self { let size = (table_byte_size / std::mem::size_of::>>()).next_power_of_two(); let mask = (size - 1) & !1; @@ -146,42 +180,10 @@ impl ConcurrentTable { } Self { table, mask, generation: AtomicU8::new(0) } } - - pub(super) fn concurrent_advance_generation(&self) { - self.generation.fetch_add(1, Ordering::SeqCst); - } -} - -impl Table for ConcurrentTable { - fn lookup(&self, hash: u64) -> Option> { - self.concurrent_lookup(hash) - } - fn store(&mut self, hash: u64, value: Evaluation, depth: u8, flag: EntryFlag, best_move: M) { - self.concurrent_store(hash, value, depth, flag, best_move) - } - fn advance_generation(&mut self) { - self.concurrent_advance_generation() - } } -impl Table for Arc> { +impl Table for ShardedTable { fn lookup(&self, hash: u64) -> Option> { - self.concurrent_lookup(hash) - } - fn store(&mut self, hash: u64, value: Evaluation, depth: u8, flag: EntryFlag, best_move: M) { - self.concurrent_store(hash, value, depth, flag, best_move) - } - fn advance_generation(&mut self) { - self.concurrent_advance_generation() - } -} - -impl ConcurrentTable -where - M: Copy, -{ - // Using two-tier table, look in the two adjacent slots - pub(super) fn concurrent_lookup(&self, hash: u64) -> Option> { let index = (hash as usize) & self.mask; for i in index..index + 2 { let entry = self.table[i].lock(); @@ -191,7 +193,15 @@ where } None } + fn store(&mut self, hash: u64, value: Evaluation, depth: u8, flag: EntryFlag, best_move: M) { + self.concurrent_store(hash, value, depth, flag, best_move) + } + fn advance_generation(&mut self) { + self.concurrent_advance_generation() + } +} +impl ConcurrentTable for ShardedTable { fn concurrent_store( &self, hash: u64, value: Evaluation, depth: u8, flag: EntryFlag, best_move: M, ) { @@ -211,19 +221,8 @@ where *self.table[index + 1].lock() = new_entry; } - // Update table based on negamax results. - pub(super) fn concurrent_update( - &self, hash: u64, alpha_orig: Evaluation, beta: Evaluation, depth: u8, best: Evaluation, - best_move: M, - ) { - let flag = if best <= alpha_orig { - EntryFlag::Upperbound - } else if best >= beta { - EntryFlag::Lowerbound - } else { - EntryFlag::Exact - }; - self.concurrent_store(hash, best, depth, flag, best_move); + fn concurrent_advance_generation(&self) { + self.generation.fetch_add(1, Ordering::SeqCst); } } @@ -255,15 +254,16 @@ impl RacyTable { } Self { table, mask, generation: AtomicU8::new(0) } } - - pub(super) fn concurrent_advance_generation(&self) { - self.generation.fetch_add(1, Ordering::SeqCst); - } } impl Table for RacyTable { fn lookup(&self, hash: u64) -> Option> { - self.concurrent_lookup(hash) + let index = (hash as usize) & self.mask; + let entry = self.table[index]; + if hash == entry.hash { + return Some(entry); + } + None } fn store(&mut self, hash: u64, value: Evaluation, depth: u8, flag: EntryFlag, best_move: M) { self.concurrent_store(hash, value, depth, flag, best_move) @@ -273,10 +273,73 @@ impl Table for RacyTable { } } -impl Table for Arc> { +impl ConcurrentTable for RacyTable { + fn concurrent_store( + &self, hash: u64, value: Evaluation, depth: u8, flag: EntryFlag, best_move: M, + ) { + let table_gen = self.generation.load(Ordering::Relaxed); + let index = (hash as usize) & self.mask; + let entry = &self.table[index]; + if entry.generation != table_gen || entry.depth <= depth { + #[allow(mutable_transmutes)] + let ptr = unsafe { std::mem::transmute::<&Entry, &mut Entry>(entry) }; + *ptr = Entry { + hash, + value, + depth, + flag, + generation: table_gen, + best_move: Some(best_move), + }; + } + } + + fn concurrent_advance_generation(&self) { + self.generation.fetch_add(1, Ordering::SeqCst); + } +} + +struct ConcurrentEntry { + high_hash: AtomicU32, + value: Evaluation, + depth: u8, + flag: EntryFlag, + generation: u8, + best_move: Option, +} + +pub(super) struct LockfreeTable { + table: Vec>, + mask: usize, + generation: AtomicU8, +} + +// Safe for cross-thread usage because of manual concurrency operations. +unsafe impl Sync for LockfreeTable {} + +impl Table for LockfreeTable { fn lookup(&self, hash: u64) -> Option> { - self.concurrent_lookup(hash) + let index = (hash as usize) & self.mask; + let entry = &self.table[index]; + if (hash >> 32) as u32 == entry.high_hash.load(Ordering::SeqCst) { + // Copy contents + let ret = Some(Entry { + // No one reads the hash. + hash: 0, + value: entry.value, + depth: entry.depth, + flag: entry.flag, + generation: entry.generation, + best_move: entry.best_move, + }); + // Verify the hash hasn't changed during the copy. + if (hash >> 32) as u32 == entry.high_hash.load(Ordering::SeqCst) { + return ret; + } + } + None } + fn store(&mut self, hash: u64, value: Evaluation, depth: u8, flag: EntryFlag, best_move: M) { self.concurrent_store(hash, value, depth, flag, best_move) } @@ -286,19 +349,29 @@ impl Table for Arc> { } #[allow(dead_code)] -impl RacyTable -where - M: Copy, -{ - pub(super) fn concurrent_lookup(&self, hash: u64) -> Option> { - let index = (hash as usize) & self.mask; - let entry = self.table[index]; - if hash == entry.hash { - return Some(entry); +impl LockfreeTable { + const WRITING_SENTINEL: u32 = 0xffff_ffff; + + pub(super) fn new(table_byte_size: usize) -> Self { + let size = + (table_byte_size / std::mem::size_of::>()).next_power_of_two(); + let mask = size - 1; + let mut table = Vec::with_capacity(size); + for _ in 0..size { + table.push(ConcurrentEntry:: { + high_hash: AtomicU32::new(0x5555_5555), + value: 0, + depth: 0, + flag: EntryFlag::Exact, + generation: 0, + best_move: None, + }); } - None + Self { table, mask, generation: AtomicU8::new(0) } } +} +impl ConcurrentTable for LockfreeTable { fn concurrent_store( &self, hash: u64, value: Evaluation, depth: u8, flag: EntryFlag, best_move: M, ) { @@ -306,31 +379,41 @@ where let index = (hash as usize) & self.mask; let entry = &self.table[index]; if entry.generation != table_gen || entry.depth <= depth { + // Set hash to sentinel value during write. + let x = entry.high_hash.load(Ordering::SeqCst); + if x == Self::WRITING_SENTINEL { + // Someone's already writing, just forget it. + return; + } + // Try to set to sentinel value: + if entry.high_hash.compare_exchange_weak( + x, + Self::WRITING_SENTINEL, + Ordering::SeqCst, + Ordering::Relaxed, + ).is_err() { + // Someone just started writing, just forget it. + return; + } + + // concurrent_lookup will throw out any read that occurs across a write. + // Unless it's a write of the same hash, but close enough. #[allow(mutable_transmutes)] - let ptr = unsafe { std::mem::transmute::<&Entry, &mut Entry>(entry) }; - *ptr = Entry { - hash, - value, - depth, - flag, - generation: table_gen, - best_move: Some(best_move), + let entry = unsafe { + std::mem::transmute::<&ConcurrentEntry, &mut ConcurrentEntry>(entry) }; + entry.value = value; + entry.depth = depth; + entry.flag = flag; + entry.generation = table_gen; + entry.best_move = Some(best_move); + + // Set hash to correct value to indicate done. + entry.high_hash.store((hash >> 32) as u32, Ordering::SeqCst); } } - // Update table based on negamax results. - pub(super) fn concurrent_update( - &self, hash: u64, alpha_orig: Evaluation, beta: Evaluation, depth: u8, best: Evaluation, - best_move: M, - ) { - let flag = if best <= alpha_orig { - EntryFlag::Upperbound - } else if best >= beta { - EntryFlag::Lowerbound - } else { - EntryFlag::Exact - }; - self.concurrent_store(hash, best, depth, flag, best_move); + fn concurrent_advance_generation(&self) { + self.generation.fetch_add(1, Ordering::SeqCst); } } diff --git a/src/strategies/ybw.rs b/src/strategies/ybw.rs index a2f70bd..ad76e8b 100644 --- a/src/strategies/ybw.rs +++ b/src/strategies/ybw.rs @@ -80,7 +80,7 @@ pub struct ParallelYbw { max_depth: usize, max_time: Duration, timeout: Arc, - table: ConcurrentTable<<::G as Game>::M>, + table: LockfreeTable<<::G as Game>::M>, //move_pool: MovePool<::M>, prev_value: Evaluation, eval: E, @@ -106,7 +106,7 @@ pub struct ParallelYbw { impl ParallelYbw { pub fn new(eval: E, opts: YbwOptions) -> ParallelYbw { - let table = ConcurrentTable::new(opts.table_byte_size); + let table = LockfreeTable::new(opts.table_byte_size); ParallelYbw { max_depth: 100, max_time: Duration::from_secs(5), From 3b0f2afce979e9178bfb9f224c7f5a6a055de2f7 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Tue, 19 Apr 2022 11:42:14 -0700 Subject: [PATCH 057/162] Shrink Evaluation to i16 to make transposition table entry smaller. Should fit 4 Entries to a cache line, even with a lock per entry. Parallel benchmark gets a little faster, negamax gets a little slower (?). --- examples/connect4.rs | 6 ++- src/interface.rs | 4 +- src/strategies/iterative.rs | 27 +++++------- src/strategies/lazy_smp.rs | 2 +- src/strategies/table.rs | 88 ++++++++++++++++++++++--------------- src/strategies/ybw.rs | 16 ++++--- tests/strategies.rs | 2 +- 7 files changed, 84 insertions(+), 61 deletions(-) diff --git a/examples/connect4.rs b/examples/connect4.rs index 918caf0..7294888 100644 --- a/examples/connect4.rs +++ b/examples/connect4.rs @@ -215,8 +215,10 @@ impl minimax::Evaluator for BasicEvaluator { let mut score = 0; // Bonus points for moves in the middle columns. for col in 2..5 { - score += ((player_pieces >> (HEIGHT * col)) & COL_MASK).count_ones() as i32; - score -= ((opponent_pieces >> (HEIGHT * col)) & COL_MASK).count_ones() as i32; + score += + ((player_pieces >> (HEIGHT * col)) & COL_MASK).count_ones() as minimax::Evaluation; + score -= ((opponent_pieces >> (HEIGHT * col)) & COL_MASK).count_ones() + as minimax::Evaluation; } // Count columns that cause immediate win. diff --git a/src/interface.rs b/src/interface.rs index 3902820..732f3a3 100644 --- a/src/interface.rs +++ b/src/interface.rs @@ -3,14 +3,14 @@ /// An assessment of a game state from the perspective of the player whose turn it is to play. /// Higher values mean a more favorable state. /// A draw is defined as a score of zero. -pub type Evaluation = i32; +pub type Evaluation = i16; // These definitions ensure that they negate to each other, but it leaves // i32::MIN as a valid value less than WORST_EVAL. Don't use this value, and // any Strategy will panic when it tries to negate it. /// An absolutely wonderful outcome, e.g. a win. -pub const BEST_EVAL: Evaluation = i32::MAX; +pub const BEST_EVAL: Evaluation = i16::MAX; /// An absolutely disastrous outcome, e.g. a loss. pub const WORST_EVAL: Evaluation = -BEST_EVAL; diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index ed5b613..4afaa25 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -10,6 +10,7 @@ use super::table::*; use super::util::*; use std::cmp::max; +use std::mem::MaybeUninit; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; use std::time::{Duration, Instant}; @@ -23,7 +24,7 @@ pub enum Replacement { // TODO: Bucket(size) } -struct TranspositionTable { +struct TranspositionTable { table: Vec>, mask: usize, // Incremented for each iterative deepening run. @@ -38,14 +39,7 @@ impl TranspositionTable { let mask = if strategy == Replacement::TwoTier { (size - 1) & !1 } else { size - 1 }; let mut table = Vec::with_capacity(size); for _ in 0..size { - table.push(Entry:: { - hash: 0, - value: 0, - depth: 0, - flag: EntryFlag::Exact, - generation: 0, - best_move: None, - }); + table.push(Entry::empty()); } Self { table, mask, generation: 0, strategy } } @@ -55,11 +49,11 @@ impl Table for TranspositionTable { fn lookup(&self, hash: u64) -> Option> { let index = (hash as usize) & self.mask; let entry = &self.table[index]; - if hash == entry.hash { + if high_hash_bits(hash) == entry.hash { Some(*entry) } else if self.strategy == Replacement::TwoTier { let entry = &self.table[index + 1]; - if hash == entry.hash { + if high_hash_bits(hash) == entry.hash { Some(*entry) } else { None @@ -94,12 +88,12 @@ impl Table for TranspositionTable { }; if let Some(index) = dest { self.table[index] = Entry { - hash, + hash: high_hash_bits(hash), value, depth, flag, generation: self.generation, - best_move: Some(best_move), + best_move: MaybeUninit::new(best_move), } } } @@ -366,7 +360,10 @@ where } } -pub struct IterativeSearch { +pub struct IterativeSearch +where + <::G as Game>::M: Copy, +{ max_depth: usize, max_time: Duration, negamaxer: Negamaxer::M>>, @@ -483,7 +480,7 @@ where break; } let entry = self.negamaxer.table.lookup(root_hash).unwrap(); - best_move = entry.best_move; + best_move = Some(entry.best_move()); self.actual_depth = max(self.actual_depth, depth); self.nodes_explored.push(self.negamaxer.nodes_explored); diff --git a/src/strategies/lazy_smp.rs b/src/strategies/lazy_smp.rs index 42daf48..26009f2 100644 --- a/src/strategies/lazy_smp.rs +++ b/src/strategies/lazy_smp.rs @@ -338,7 +338,7 @@ where } let entry = self.table.lookup(root_hash).unwrap(); - best_move = entry.best_move; + best_move = Some(entry.best_move()); self.actual_depth = max(self.actual_depth, depth); self.prev_value = entry.value; diff --git a/src/strategies/table.rs b/src/strategies/table.rs index 2c1f6d0..f78e33c 100644 --- a/src/strategies/table.rs +++ b/src/strategies/table.rs @@ -3,6 +3,7 @@ extern crate parking_lot; use crate::interface::*; use parking_lot::Mutex; use std::cmp::{max, min}; +use std::mem::MaybeUninit; use std::sync::atomic::{AtomicU8, Ordering}; use std::sync::Arc; @@ -15,22 +16,47 @@ pub(super) enum EntryFlag { Lowerbound, } -// TODO: Optimize size. Ideally 16 bytes or less. #[derive(Copy, Clone)] -pub(super) struct Entry { - pub(super) hash: u64, +pub(super) struct Entry { + // High bits of hash. Low bits are used in table index. + pub(super) hash: u32, pub(super) value: Evaluation, pub(super) depth: u8, pub(super) flag: EntryFlag, pub(super) generation: u8, - pub(super) best_move: Option, + // Always initialized when entry is populated. + pub(super) best_move: MaybeUninit, } #[test] fn test_entry_size() { - // TODO: ratchet down - assert!(std::mem::size_of::>() <= 24); - assert!(std::mem::size_of::>>() <= 32); + // Even with the mutex, we want this to be within 16 bytes so 4 can fit on a cache line. + // 3 byte move allows enum Moves with 2 bytes of payload. + assert!(std::mem::size_of::>() <= 16); + assert!(std::mem::size_of::>>() <= 16); +} + +impl Entry { + pub(super) fn empty() -> Self { + Entry { + hash: 0, + value: 0, + depth: 0, + flag: EntryFlag::Exact, + generation: 0, + best_move: MaybeUninit::uninit(), + } + } + + pub(super) fn best_move(&self) -> M { + debug_assert!(self.hash != 0); + unsafe { self.best_move.assume_init() } + } +} + +pub(super) fn high_hash_bits(hash: u64) -> u32 { + // Always set the bottom bit to ensure no one matches the zero hash. + (hash >> 32) as u32 | 1 } // A trait for a transposition table. The methods are mutual exclusion, but @@ -48,7 +74,7 @@ pub(super) trait Table { beta: &mut Evaluation, ) -> Option { if let Some(entry) = self.lookup(hash) { - *good_move = entry.best_move; + *good_move = Some(entry.best_move()); if entry.depth >= depth { match entry.flag { EntryFlag::Exact => { @@ -102,7 +128,7 @@ pub(super) trait Table { // value to be exact, and we can't guarantee that the table entry // will remain in the table between the searches that find // equivalent upper and lower bounds. - let m = entry.best_move.unwrap(); + let m = entry.best_move(); pv.push(m); m.apply(s); hash = s.zobrist_hash(); @@ -121,7 +147,7 @@ pub(super) trait Table { // It would be nice to unify most of the implementation of the single-threaded // and concurrent tables, but the methods need different signatures. -pub(super) struct ConcurrentTable { +pub(super) struct ConcurrentTable { table: Vec>>, mask: usize, // Incremented for each iterative deepening run. @@ -129,20 +155,13 @@ pub(super) struct ConcurrentTable { generation: AtomicU8, } -impl ConcurrentTable { +impl ConcurrentTable { pub(super) fn new(table_byte_size: usize) -> Self { let size = (table_byte_size / std::mem::size_of::>>()).next_power_of_two(); let mask = (size - 1) & !1; let mut table = Vec::with_capacity(size); for _ in 0..size { - table.push(Mutex::new(Entry:: { - hash: 0, - value: 0, - depth: 0, - flag: EntryFlag::Exact, - generation: 0, - best_move: None, - })); + table.push(Mutex::new(Entry::empty())); } Self { table, mask, generation: AtomicU8::new(0) } } @@ -185,7 +204,7 @@ where let index = (hash as usize) & self.mask; for i in index..index + 2 { let entry = self.table[i].lock(); - if hash == entry.hash { + if high_hash_bits(hash) == entry.hash { return Some(*entry); } } @@ -198,8 +217,14 @@ where let table_gen = self.generation.load(Ordering::Relaxed); // index points to the first of a pair of entries, the depth-preferred entry and the always-replace entry. let index = (hash as usize) & self.mask; - let new_entry = - Entry { hash, value, depth, flag, generation: table_gen, best_move: Some(best_move) }; + let new_entry = Entry { + hash: high_hash_bits(hash), + value, + depth, + flag, + generation: table_gen, + best_move: MaybeUninit::new(best_move), + }; { let mut entry = self.table[index].lock(); if entry.generation != table_gen || entry.depth <= depth { @@ -229,7 +254,7 @@ where // A concurrent table that doesn't bother to use atomic operations to access its entries. // It's crazily unsafe, but somehow StockFish gets away with this? -pub(super) struct RacyTable { +pub(super) struct RacyTable { table: Vec>, mask: usize, // Incremented for each iterative deepening run. @@ -238,20 +263,13 @@ pub(super) struct RacyTable { } #[allow(dead_code)] -impl RacyTable { +impl RacyTable { pub(super) fn new(table_byte_size: usize) -> Self { let size = (table_byte_size / std::mem::size_of::>()).next_power_of_two(); let mask = size - 1; let mut table = Vec::with_capacity(size); for _ in 0..size { - table.push(Entry:: { - hash: 0, - value: 0, - depth: 0, - flag: EntryFlag::Exact, - generation: 0, - best_move: None, - }); + table.push(Entry::empty()); } Self { table, mask, generation: AtomicU8::new(0) } } @@ -293,7 +311,7 @@ where pub(super) fn concurrent_lookup(&self, hash: u64) -> Option> { let index = (hash as usize) & self.mask; let entry = self.table[index]; - if hash == entry.hash { + if high_hash_bits(hash) == entry.hash { return Some(entry); } None @@ -309,12 +327,12 @@ where #[allow(mutable_transmutes)] let ptr = unsafe { std::mem::transmute::<&Entry, &mut Entry>(entry) }; *ptr = Entry { - hash, + hash: high_hash_bits(hash), value, depth, flag, generation: table_gen, - best_move: Some(best_move), + best_move: MaybeUninit::new(best_move), }; } } diff --git a/src/strategies/ybw.rs b/src/strategies/ybw.rs index a2f70bd..e4f9f4f 100644 --- a/src/strategies/ybw.rs +++ b/src/strategies/ybw.rs @@ -13,7 +13,7 @@ use super::util::*; use rayon::prelude::*; use std::cmp::max; -use std::sync::atomic::{AtomicBool, AtomicI32, Ordering}; +use std::sync::atomic::{AtomicBool, AtomicI16, Ordering}; use std::sync::{Arc, Mutex}; use std::time::{Duration, Instant}; @@ -76,7 +76,10 @@ impl YbwOptions { } } -pub struct ParallelYbw { +pub struct ParallelYbw +where + <::G as Game>::M: Copy, +{ max_depth: usize, max_time: Duration, timeout: Arc, @@ -104,7 +107,10 @@ pub struct ParallelYbw { wall_time: Duration, } -impl ParallelYbw { +impl ParallelYbw +where + ::M: Copy, +{ pub fn new(eval: E, opts: YbwOptions) -> ParallelYbw { let table = ConcurrentTable::new(opts.table_byte_size); ParallelYbw { @@ -296,7 +302,7 @@ impl ParallelYbw { } (best, best_move) } else { - let alpha = AtomicI32::new(alpha); + let alpha = AtomicI16::new(alpha); let best_move = Mutex::new(ValueMove::new(initial_value, first_move)); // Parallel search let result = moves.par_iter().with_max_len(1).try_for_each(|&m| -> Option<()> { @@ -380,7 +386,7 @@ where break; } let entry = self.table.lookup(root_hash).unwrap(); - best_move = entry.best_move; + best_move = Some(entry.best_move()); self.actual_depth = max(self.actual_depth, depth); self.nodes_explored.push(self.next_depth_nodes); diff --git a/tests/strategies.rs b/tests/strategies.rs index 88d998b..a91b775 100644 --- a/tests/strategies.rs +++ b/tests/strategies.rs @@ -98,7 +98,7 @@ impl minimax::Evaluator for RandomEvaluator { hasher.write_u64(b.pieces_to_move); let hash = hasher.finish(); // Use fewer bits so that we get some equal values. - (hash as minimax::Evaluation) >> 25 + (hash as minimax::Evaluation) >> 9 } } From cc9bf23ff6b9da86b9fb8f4ce108cab0e0989e21 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Tue, 19 Apr 2022 11:47:02 -0700 Subject: [PATCH 058/162] Export Random strategy. --- src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/lib.rs b/src/lib.rs index 7392328..3ae875d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,5 +10,6 @@ pub use interface::{ pub use strategies::iterative::{IterativeOptions, IterativeSearch, Replacement}; pub use strategies::lazy_smp::{LazySmp, LazySmpOptions}; pub use strategies::negamax::Negamax; +pub use strategies::random::Random; pub use strategies::ybw::{ParallelYbw, YbwOptions}; pub use util::perft; From 7078ea7a6eab0a7678734ca69a614e4d94de70a0 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Tue, 19 Apr 2022 15:21:57 -0700 Subject: [PATCH 059/162] Add multi-threaded perft option. --- examples/connect4.rs | 2 +- src/util.rs | 40 +++++++++++++++++++++++++++++++++++----- 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/examples/connect4.rs b/examples/connect4.rs index 7294888..178c9f4 100644 --- a/examples/connect4.rs +++ b/examples/connect4.rs @@ -258,7 +258,7 @@ fn main() { let mut b = Board::default(); if std::env::args().any(|arg| arg == "perft") { - perft::(&mut b, 10); + perft::(&mut b, 10, false); return; } diff --git a/src/util.rs b/src/util.rs index 61cdcd6..b465d46 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1,7 +1,11 @@ //! Utility functions for testing, and tests. +extern crate rayon; + use super::interface; use super::interface::{Game, Move}; + +use rayon::prelude::*; use std::default::Default; use std::time::Instant; @@ -55,7 +59,13 @@ impl MovePool { } } -fn perft_recurse(pool: &mut MovePool, state: &mut G::S, depth: usize) -> u64 { +fn perft_recurse( + pool: &mut MovePool, state: &mut G::S, depth: usize, single_thread_cutoff: usize, +) -> u64 +where + ::S: Clone + Sync, + ::M: Copy + Sync, +{ if depth == 0 { return 1; } @@ -67,26 +77,46 @@ fn perft_recurse(pool: &mut MovePool, state: &mut G::S, depth: us G::generate_moves(state, &mut moves); let n = if depth == 1 { moves.len() as u64 - } else { + } else if depth <= single_thread_cutoff { + // Single-thread recurse. let mut count = 0; for m in moves.iter() { m.apply(state); - count += perft_recurse::(pool, state, depth - 1); + count += perft_recurse::(pool, state, depth - 1, single_thread_cutoff); m.undo(state); } count + } else { + // Multi-thread recurse. + moves + .par_iter() + .with_max_len(1) + .map(|&m| { + let mut state2 = state.clone(); + let mut pool2 = MovePool::::default(); + m.apply(&mut state2); + perft_recurse::(&mut pool2, &mut state2, depth - 1, single_thread_cutoff) + }) + .sum() }; pool.free(moves); n } -pub fn perft(state: &mut ::S, max_depth: usize) -> Vec { +pub fn perft( + state: &mut ::S, max_depth: usize, multi_threaded: bool, +) -> Vec +where + ::S: Clone + Sync, + ::M: Copy + Sync, +{ println!("depth count time kn/s"); let mut pool = MovePool::::default(); let mut counts = Vec::new(); + let single_thread_cutoff = if multi_threaded { 3 } else { max_depth }; for depth in 0..max_depth + 1 { let start = Instant::now(); - let count = perft_recurse::(&mut pool, state, depth); + let count = perft_recurse::(&mut pool, state, depth, single_thread_cutoff); let dur = start.elapsed(); let rate = count as f64 / dur.as_secs_f64() / 1000.0; println!("{:>5} {:>15} {:>11} {:>11.1}", depth, count, format!("{:.1?}", dur), rate); From 06958a1afa75072d8199d8b544baaf7bcaa5d97d Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Thu, 21 Apr 2022 22:30:47 -0700 Subject: [PATCH 060/162] New MCTS strategy. Beats Random. Multiple threads coming. --- src/lib.rs | 1 + src/strategies/mcts.rs | 201 +++++++++++++++++++++++++++++++++++++++++ src/strategies/mod.rs | 1 + src/strategies/util.rs | 54 ++++++++++- tests/strategies.rs | 2 +- tests/ttt.rs | 12 ++- 6 files changed, 267 insertions(+), 4 deletions(-) create mode 100644 src/strategies/mcts.rs diff --git a/src/lib.rs b/src/lib.rs index 3ae875d..ffbbd3c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,6 +9,7 @@ pub use interface::{ }; pub use strategies::iterative::{IterativeOptions, IterativeSearch, Replacement}; pub use strategies::lazy_smp::{LazySmp, LazySmpOptions}; +pub use strategies::mcts::MonteCarloTreeSearch; pub use strategies::negamax::Negamax; pub use strategies::random::Random; pub use strategies::ybw::{ParallelYbw, YbwOptions}; diff --git a/src/strategies/mcts.rs b/src/strategies/mcts.rs new file mode 100644 index 0000000..011a743 --- /dev/null +++ b/src/strategies/mcts.rs @@ -0,0 +1,201 @@ +use super::super::interface::*; +use super::util::AtomicBox; + +use rand::seq::SliceRandom; +use rand::Rng; +use std::sync::atomic::{AtomicI32, AtomicU32, Ordering}; + +struct Node { + // The Move to get from the parent to here. + // Only None at the root. + m: Option, + visits: AtomicU32, + // +1 for wins, -1 for losses, +0 for draws. + // From perspective of player to move. + score: AtomicI32, + // Lazily populated. + expansion: AtomicBox>, +} + +struct NodeExpansion { + // Populated if this node is an end state. + winner: Option, + children: Vec>, +} + +fn new_expansion(state: &G::S) -> Box> { + let winner = G::get_winner(state); + let children = if winner.is_some() { + Vec::new() + } else { + let mut moves = Vec::new(); + G::generate_moves(state, &mut moves); + moves.into_iter().map(|m| Node::new(Some(m))).collect::>() + }; + Box::new(NodeExpansion { winner, children }) +} + +impl Node { + fn new(m: Option) -> Self { + Node { + m, + expansion: AtomicBox::default(), + visits: AtomicU32::new(0), + score: AtomicI32::new(0), + } + } + + // Choose best child based on UCT. + fn best_child(&self, exploration_score: f32) -> Option<&Node> { + let log_visits = (self.visits.load(Ordering::SeqCst) as f32).log2(); + let expansion = self.expansion.get()?; + // Find a node, randomly chosen among the best scores. + // TODO: make it more uniformly random? + let n = expansion.children.len(); + let mut i = rand::thread_rng().gen_range(0..n); + let mut best_score = f32::NEG_INFINITY; + let mut best_child = None; + for _ in 0..n { + let score = expansion.children[i].uct_score(exploration_score, log_visits); + debug_assert!(!score.is_nan()); + if score > best_score { + best_score = score; + best_child = Some(&expansion.children[i]); + } + i = (i + 1) % n; + } + best_child + } + + fn uct_score(&self, exploration_score: f32, log_parent_visits: f32) -> f32 { + let visits = self.visits.load(Ordering::Relaxed) as f32; + let score = self.score.load(Ordering::Relaxed) as f32; + if visits == 0.0 { + // Avoid NaNs. + return if exploration_score > 0.0 { f32::INFINITY } else { 0.0 }; + } + let win_ratio = (score + visits) / (2.0 * visits); + win_ratio + exploration_score * (2.0 * log_parent_visits / visits).sqrt() + } +} + +pub struct MonteCarloTreeSearch { + // TODO: Evaluator + + // Config + max_rollout_depth: usize, + max_rollouts: u32, + //max_time: Duration, + // TODO: rollouts_per_node + // TODO: num_threads +} + +impl MonteCarloTreeSearch { + pub fn new() -> Self { + Self { max_rollout_depth: 200, max_rollouts: 100 } + } + + // Returns score for this node. +1 for win of original player to move. + // TODO: policy options: random, look 1 ahead for winning moves, BYO Evaluator. + fn rollout(&self, s: &G::S) -> i32 + where + G::S: Clone, + { + let mut rng = rand::thread_rng(); + let mut depth = self.max_rollout_depth; + let mut state = s.clone(); + let mut moves = Vec::new(); + let mut sign = 1; + loop { + if let Some(winner) = G::get_winner(&state) { + return match winner { + Winner::PlayerJustMoved => 1, + Winner::PlayerToMove => -1, + Winner::Draw => 0, + } * sign; + } + + if depth == 0 { + return 0; + } + + moves.clear(); + G::generate_moves(s, &mut moves); + let m = moves.choose(&mut rng).unwrap(); + m.apply(&mut state); + sign = -sign; + depth -= 1; + } + } + + // Explore the tree, make a new node, rollout, backpropagate. + fn simulate(&self, node: &Node, state: &mut G::S, mut force_rollout: bool) -> i32 + where + G::S: Clone, + { + if force_rollout { + let result = self.rollout::(state); + + // Backpropagate. + node.visits.fetch_add(1, Ordering::SeqCst); + node.score.fetch_add(result, Ordering::SeqCst); + return result; + } + + let expansion = node.expansion.get().unwrap_or_else(|| { + // Expand this node, and force a rollout when we recurse. + force_rollout = true; + node.expansion.try_set(new_expansion::(state)) + }); + + if let Some(winner) = expansion.winner { + let result = match winner { + Winner::PlayerJustMoved => 1, + Winner::PlayerToMove => -1, + Winner::Draw => 0, + }; + + // Backpropagate. + node.visits.fetch_add(1, Ordering::SeqCst); + node.score.fetch_add(result, Ordering::SeqCst); + return result; + } + + // Recurse. + let next = node.best_child(1.).unwrap(); + let m = next.m.as_ref().unwrap(); + m.apply(state); + let result = -self.simulate::(next, state, force_rollout); + m.undo(state); + + // Backpropagate. + node.visits.fetch_add(1, Ordering::SeqCst); + node.score.fetch_add(result, Ordering::SeqCst); + result + } +} + +impl Strategy for MonteCarloTreeSearch +where + G::S: Clone, + G::M: Copy, +{ + fn choose_move(&mut self, s: &G::S) -> Option { + let root = Node::::new(None); + root.expansion.try_set(new_expansion::(s)); + let mut state = s.clone(); + for _ in 0..self.max_rollouts { + self.simulate::(&root, &mut state, false); + } + debug_assert_eq!(self.max_rollouts, root.visits.load(Ordering::SeqCst)); + let exploration = 0.0; // Just get best node. + root.best_child(exploration).map(|node| node.m.unwrap()) + } +} + +mod tests { + // TODO: make a fake game with branching_factor=1 to test correct signage of results. + // TODO: make a game with branching_factor=2: add or subtract to shared total + + // or maybe just run tic tac toe against random many times and check that it always wins +} diff --git a/src/strategies/mod.rs b/src/strategies/mod.rs index 2b8664e..b6b4b58 100644 --- a/src/strategies/mod.rs +++ b/src/strategies/mod.rs @@ -2,6 +2,7 @@ pub mod iterative; pub mod lazy_smp; +pub mod mcts; pub mod negamax; pub mod random; pub mod ybw; diff --git a/src/strategies/util.rs b/src/strategies/util.rs index 9f86195..18ca56f 100644 --- a/src/strategies/util.rs +++ b/src/strategies/util.rs @@ -1,6 +1,6 @@ use super::super::interface::*; -use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::atomic::{AtomicBool, AtomicPtr, Ordering}; use std::sync::Arc; use std::thread::{sleep, spawn}; use std::time::Duration; @@ -63,3 +63,55 @@ impl ValueMove { (self.value, self.m) } } + +// An insert-only lock-free Option> +pub(super) struct AtomicBox(AtomicPtr); + +impl Default for AtomicBox { + fn default() -> Self { + Self(AtomicPtr::default()) + } +} + +impl AtomicBox { + // Tries to set the AtomicBox to this value if empty. + // Returns a reference to whatever is in the box. + pub(super) fn try_set(&self, value: Box) -> &T { + let ptr = Box::into_raw(value); + // Try to replace nullptr with the value. + let ret_ptr = if let Err(new_ptr) = + self.0.compare_exchange(std::ptr::null_mut(), ptr, Ordering::SeqCst, Ordering::SeqCst) + { + // If someone beat us to it, return the original drop the new one. + unsafe { drop(Box::from_raw(ptr)) }; + new_ptr + } else { + ptr + }; + unsafe { ret_ptr.as_ref().unwrap() } + } + + pub(super) fn get(&self) -> Option<&T> { + let ptr = self.0.load(Ordering::Relaxed); + unsafe { ptr.as_ref() } + } +} + +impl Drop for AtomicBox { + fn drop(&mut self) { + let ptr = *self.0.get_mut(); + if !ptr.is_null() { + unsafe { drop(Box::from_raw(ptr)) }; + } + } +} + +#[test] +fn test_atomic_box() { + let b = AtomicBox::::default(); + assert_eq!(None, b.get()); + b.try_set(Box::new(3)); + assert_eq!(Some(&3), b.get()); + b.try_set(Box::new(4)); + assert_eq!(Some(&3), b.get()); +} diff --git a/tests/strategies.rs b/tests/strategies.rs index a91b775..d7a4765 100644 --- a/tests/strategies.rs +++ b/tests/strategies.rs @@ -1,4 +1,4 @@ -// For a given Evaluator and depth, all Strategies should produce the same +// For a given Evaluator and depth, all negamax Strategies should produce the same // value for the root. They use different techniques and pruning heuristics // for speed, but it's all fundamentally the minimax algorithm. This file // creates fake evaluation trees of connect four, and ensures that all diff --git a/tests/ttt.rs b/tests/ttt.rs index db662fb..db67f26 100644 --- a/tests/ttt.rs +++ b/tests/ttt.rs @@ -4,7 +4,7 @@ extern crate minimax; mod ttt; use minimax::util::battle_royale; -use minimax::Negamax; +use minimax::{MonteCarloTreeSearch, Negamax, Random}; // Ensure that two players using negamax always results in a draw. #[test] @@ -20,10 +20,18 @@ fn test_ttt_negamax_always_draws() { // either a draw or a win for the former player. #[test] fn test_ttt_negamax_vs_random_always_wins_or_draws() { - use minimax::strategies::random::Random; let mut s1 = Negamax::new(ttt::Evaluator::default(), 10); let mut s2 = Random::new(); for _ in 0..100 { assert_ne!(battle_royale(&mut s1, &mut s2), Some(1)); } } + +#[test] +fn test_ttt_mcts_vs_random_always_wins_or_draws() { + let mut s1 = MonteCarloTreeSearch::new(); + let mut s2 = Random::new(); + for _ in 0..100 { + assert_ne!(battle_royale::(&mut s1, &mut s2), Some(1)); + } +} From 2fc91daf1a3c3042c11fc363b3939e87cf7b8187 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Fri, 22 Apr 2022 11:23:00 -0700 Subject: [PATCH 061/162] Revert "Shrink Evaluation to i16 to make transposition table entry smaller." This reverts commit 3b0f2afce979e9178bfb9f224c7f5a6a055de2f7. Turns out I already implemented a better way to do this on another machine and forgot to push it. --- examples/connect4.rs | 6 +-- src/interface.rs | 4 +- src/strategies/iterative.rs | 27 +++++++----- src/strategies/lazy_smp.rs | 2 +- src/strategies/table.rs | 88 +++++++++++++++---------------------- src/strategies/ybw.rs | 16 +++---- tests/strategies.rs | 2 +- 7 files changed, 61 insertions(+), 84 deletions(-) diff --git a/examples/connect4.rs b/examples/connect4.rs index 178c9f4..3784134 100644 --- a/examples/connect4.rs +++ b/examples/connect4.rs @@ -215,10 +215,8 @@ impl minimax::Evaluator for BasicEvaluator { let mut score = 0; // Bonus points for moves in the middle columns. for col in 2..5 { - score += - ((player_pieces >> (HEIGHT * col)) & COL_MASK).count_ones() as minimax::Evaluation; - score -= ((opponent_pieces >> (HEIGHT * col)) & COL_MASK).count_ones() - as minimax::Evaluation; + score += ((player_pieces >> (HEIGHT * col)) & COL_MASK).count_ones() as i32; + score -= ((opponent_pieces >> (HEIGHT * col)) & COL_MASK).count_ones() as i32; } // Count columns that cause immediate win. diff --git a/src/interface.rs b/src/interface.rs index 732f3a3..3902820 100644 --- a/src/interface.rs +++ b/src/interface.rs @@ -3,14 +3,14 @@ /// An assessment of a game state from the perspective of the player whose turn it is to play. /// Higher values mean a more favorable state. /// A draw is defined as a score of zero. -pub type Evaluation = i16; +pub type Evaluation = i32; // These definitions ensure that they negate to each other, but it leaves // i32::MIN as a valid value less than WORST_EVAL. Don't use this value, and // any Strategy will panic when it tries to negate it. /// An absolutely wonderful outcome, e.g. a win. -pub const BEST_EVAL: Evaluation = i16::MAX; +pub const BEST_EVAL: Evaluation = i32::MAX; /// An absolutely disastrous outcome, e.g. a loss. pub const WORST_EVAL: Evaluation = -BEST_EVAL; diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index 4afaa25..ed5b613 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -10,7 +10,6 @@ use super::table::*; use super::util::*; use std::cmp::max; -use std::mem::MaybeUninit; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; use std::time::{Duration, Instant}; @@ -24,7 +23,7 @@ pub enum Replacement { // TODO: Bucket(size) } -struct TranspositionTable { +struct TranspositionTable { table: Vec>, mask: usize, // Incremented for each iterative deepening run. @@ -39,7 +38,14 @@ impl TranspositionTable { let mask = if strategy == Replacement::TwoTier { (size - 1) & !1 } else { size - 1 }; let mut table = Vec::with_capacity(size); for _ in 0..size { - table.push(Entry::empty()); + table.push(Entry:: { + hash: 0, + value: 0, + depth: 0, + flag: EntryFlag::Exact, + generation: 0, + best_move: None, + }); } Self { table, mask, generation: 0, strategy } } @@ -49,11 +55,11 @@ impl Table for TranspositionTable { fn lookup(&self, hash: u64) -> Option> { let index = (hash as usize) & self.mask; let entry = &self.table[index]; - if high_hash_bits(hash) == entry.hash { + if hash == entry.hash { Some(*entry) } else if self.strategy == Replacement::TwoTier { let entry = &self.table[index + 1]; - if high_hash_bits(hash) == entry.hash { + if hash == entry.hash { Some(*entry) } else { None @@ -88,12 +94,12 @@ impl Table for TranspositionTable { }; if let Some(index) = dest { self.table[index] = Entry { - hash: high_hash_bits(hash), + hash, value, depth, flag, generation: self.generation, - best_move: MaybeUninit::new(best_move), + best_move: Some(best_move), } } } @@ -360,10 +366,7 @@ where } } -pub struct IterativeSearch -where - <::G as Game>::M: Copy, -{ +pub struct IterativeSearch { max_depth: usize, max_time: Duration, negamaxer: Negamaxer::M>>, @@ -480,7 +483,7 @@ where break; } let entry = self.negamaxer.table.lookup(root_hash).unwrap(); - best_move = Some(entry.best_move()); + best_move = entry.best_move; self.actual_depth = max(self.actual_depth, depth); self.nodes_explored.push(self.negamaxer.nodes_explored); diff --git a/src/strategies/lazy_smp.rs b/src/strategies/lazy_smp.rs index 26009f2..42daf48 100644 --- a/src/strategies/lazy_smp.rs +++ b/src/strategies/lazy_smp.rs @@ -338,7 +338,7 @@ where } let entry = self.table.lookup(root_hash).unwrap(); - best_move = Some(entry.best_move()); + best_move = entry.best_move; self.actual_depth = max(self.actual_depth, depth); self.prev_value = entry.value; diff --git a/src/strategies/table.rs b/src/strategies/table.rs index f78e33c..2c1f6d0 100644 --- a/src/strategies/table.rs +++ b/src/strategies/table.rs @@ -3,7 +3,6 @@ extern crate parking_lot; use crate::interface::*; use parking_lot::Mutex; use std::cmp::{max, min}; -use std::mem::MaybeUninit; use std::sync::atomic::{AtomicU8, Ordering}; use std::sync::Arc; @@ -16,47 +15,22 @@ pub(super) enum EntryFlag { Lowerbound, } +// TODO: Optimize size. Ideally 16 bytes or less. #[derive(Copy, Clone)] -pub(super) struct Entry { - // High bits of hash. Low bits are used in table index. - pub(super) hash: u32, +pub(super) struct Entry { + pub(super) hash: u64, pub(super) value: Evaluation, pub(super) depth: u8, pub(super) flag: EntryFlag, pub(super) generation: u8, - // Always initialized when entry is populated. - pub(super) best_move: MaybeUninit, + pub(super) best_move: Option, } #[test] fn test_entry_size() { - // Even with the mutex, we want this to be within 16 bytes so 4 can fit on a cache line. - // 3 byte move allows enum Moves with 2 bytes of payload. - assert!(std::mem::size_of::>() <= 16); - assert!(std::mem::size_of::>>() <= 16); -} - -impl Entry { - pub(super) fn empty() -> Self { - Entry { - hash: 0, - value: 0, - depth: 0, - flag: EntryFlag::Exact, - generation: 0, - best_move: MaybeUninit::uninit(), - } - } - - pub(super) fn best_move(&self) -> M { - debug_assert!(self.hash != 0); - unsafe { self.best_move.assume_init() } - } -} - -pub(super) fn high_hash_bits(hash: u64) -> u32 { - // Always set the bottom bit to ensure no one matches the zero hash. - (hash >> 32) as u32 | 1 + // TODO: ratchet down + assert!(std::mem::size_of::>() <= 24); + assert!(std::mem::size_of::>>() <= 32); } // A trait for a transposition table. The methods are mutual exclusion, but @@ -74,7 +48,7 @@ pub(super) trait Table { beta: &mut Evaluation, ) -> Option { if let Some(entry) = self.lookup(hash) { - *good_move = Some(entry.best_move()); + *good_move = entry.best_move; if entry.depth >= depth { match entry.flag { EntryFlag::Exact => { @@ -128,7 +102,7 @@ pub(super) trait Table { // value to be exact, and we can't guarantee that the table entry // will remain in the table between the searches that find // equivalent upper and lower bounds. - let m = entry.best_move(); + let m = entry.best_move.unwrap(); pv.push(m); m.apply(s); hash = s.zobrist_hash(); @@ -147,7 +121,7 @@ pub(super) trait Table { // It would be nice to unify most of the implementation of the single-threaded // and concurrent tables, but the methods need different signatures. -pub(super) struct ConcurrentTable { +pub(super) struct ConcurrentTable { table: Vec>>, mask: usize, // Incremented for each iterative deepening run. @@ -155,13 +129,20 @@ pub(super) struct ConcurrentTable { generation: AtomicU8, } -impl ConcurrentTable { +impl ConcurrentTable { pub(super) fn new(table_byte_size: usize) -> Self { let size = (table_byte_size / std::mem::size_of::>>()).next_power_of_two(); let mask = (size - 1) & !1; let mut table = Vec::with_capacity(size); for _ in 0..size { - table.push(Mutex::new(Entry::empty())); + table.push(Mutex::new(Entry:: { + hash: 0, + value: 0, + depth: 0, + flag: EntryFlag::Exact, + generation: 0, + best_move: None, + })); } Self { table, mask, generation: AtomicU8::new(0) } } @@ -204,7 +185,7 @@ where let index = (hash as usize) & self.mask; for i in index..index + 2 { let entry = self.table[i].lock(); - if high_hash_bits(hash) == entry.hash { + if hash == entry.hash { return Some(*entry); } } @@ -217,14 +198,8 @@ where let table_gen = self.generation.load(Ordering::Relaxed); // index points to the first of a pair of entries, the depth-preferred entry and the always-replace entry. let index = (hash as usize) & self.mask; - let new_entry = Entry { - hash: high_hash_bits(hash), - value, - depth, - flag, - generation: table_gen, - best_move: MaybeUninit::new(best_move), - }; + let new_entry = + Entry { hash, value, depth, flag, generation: table_gen, best_move: Some(best_move) }; { let mut entry = self.table[index].lock(); if entry.generation != table_gen || entry.depth <= depth { @@ -254,7 +229,7 @@ where // A concurrent table that doesn't bother to use atomic operations to access its entries. // It's crazily unsafe, but somehow StockFish gets away with this? -pub(super) struct RacyTable { +pub(super) struct RacyTable { table: Vec>, mask: usize, // Incremented for each iterative deepening run. @@ -263,13 +238,20 @@ pub(super) struct RacyTable { } #[allow(dead_code)] -impl RacyTable { +impl RacyTable { pub(super) fn new(table_byte_size: usize) -> Self { let size = (table_byte_size / std::mem::size_of::>()).next_power_of_two(); let mask = size - 1; let mut table = Vec::with_capacity(size); for _ in 0..size { - table.push(Entry::empty()); + table.push(Entry:: { + hash: 0, + value: 0, + depth: 0, + flag: EntryFlag::Exact, + generation: 0, + best_move: None, + }); } Self { table, mask, generation: AtomicU8::new(0) } } @@ -311,7 +293,7 @@ where pub(super) fn concurrent_lookup(&self, hash: u64) -> Option> { let index = (hash as usize) & self.mask; let entry = self.table[index]; - if high_hash_bits(hash) == entry.hash { + if hash == entry.hash { return Some(entry); } None @@ -327,12 +309,12 @@ where #[allow(mutable_transmutes)] let ptr = unsafe { std::mem::transmute::<&Entry, &mut Entry>(entry) }; *ptr = Entry { - hash: high_hash_bits(hash), + hash, value, depth, flag, generation: table_gen, - best_move: MaybeUninit::new(best_move), + best_move: Some(best_move), }; } } diff --git a/src/strategies/ybw.rs b/src/strategies/ybw.rs index e4f9f4f..a2f70bd 100644 --- a/src/strategies/ybw.rs +++ b/src/strategies/ybw.rs @@ -13,7 +13,7 @@ use super::util::*; use rayon::prelude::*; use std::cmp::max; -use std::sync::atomic::{AtomicBool, AtomicI16, Ordering}; +use std::sync::atomic::{AtomicBool, AtomicI32, Ordering}; use std::sync::{Arc, Mutex}; use std::time::{Duration, Instant}; @@ -76,10 +76,7 @@ impl YbwOptions { } } -pub struct ParallelYbw -where - <::G as Game>::M: Copy, -{ +pub struct ParallelYbw { max_depth: usize, max_time: Duration, timeout: Arc, @@ -107,10 +104,7 @@ where wall_time: Duration, } -impl ParallelYbw -where - ::M: Copy, -{ +impl ParallelYbw { pub fn new(eval: E, opts: YbwOptions) -> ParallelYbw { let table = ConcurrentTable::new(opts.table_byte_size); ParallelYbw { @@ -302,7 +296,7 @@ where } (best, best_move) } else { - let alpha = AtomicI16::new(alpha); + let alpha = AtomicI32::new(alpha); let best_move = Mutex::new(ValueMove::new(initial_value, first_move)); // Parallel search let result = moves.par_iter().with_max_len(1).try_for_each(|&m| -> Option<()> { @@ -386,7 +380,7 @@ where break; } let entry = self.table.lookup(root_hash).unwrap(); - best_move = Some(entry.best_move()); + best_move = entry.best_move; self.actual_depth = max(self.actual_depth, depth); self.nodes_explored.push(self.next_depth_nodes); diff --git a/tests/strategies.rs b/tests/strategies.rs index d7a4765..9e9c540 100644 --- a/tests/strategies.rs +++ b/tests/strategies.rs @@ -98,7 +98,7 @@ impl minimax::Evaluator for RandomEvaluator { hasher.write_u64(b.pieces_to_move); let hash = hasher.finish(); // Use fewer bits so that we get some equal values. - (hash as minimax::Evaluation) >> 9 + (hash as minimax::Evaluation) >> 25 } } From 33470fa8505a973a338573b55e216daabead54b9 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Fri, 22 Apr 2022 11:59:16 -0700 Subject: [PATCH 062/162] Ratchet down Entry size to 16 bytes for regular and concurrent tables. Also reduce read races in LockFreeTable when overwriting the same hash. --- src/strategies/iterative.rs | 8 ++--- src/strategies/table.rs | 67 ++++++++++++++++++++++++------------- src/util.rs | 2 +- 3 files changed, 48 insertions(+), 29 deletions(-) diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index ed5b613..903b258 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -39,7 +39,7 @@ impl TranspositionTable { let mut table = Vec::with_capacity(size); for _ in 0..size { table.push(Entry:: { - hash: 0, + high_hash: 0, value: 0, depth: 0, flag: EntryFlag::Exact, @@ -55,11 +55,11 @@ impl Table for TranspositionTable { fn lookup(&self, hash: u64) -> Option> { let index = (hash as usize) & self.mask; let entry = &self.table[index]; - if hash == entry.hash { + if high_bits(hash) == entry.high_hash { Some(*entry) } else if self.strategy == Replacement::TwoTier { let entry = &self.table[index + 1]; - if hash == entry.hash { + if high_bits(hash) == entry.high_hash { Some(*entry) } else { None @@ -94,7 +94,7 @@ impl Table for TranspositionTable { }; if let Some(index) = dest { self.table[index] = Entry { - hash, + high_hash: high_bits(hash), value, depth, flag, diff --git a/src/strategies/table.rs b/src/strategies/table.rs index 2ae7baa..66b155d 100644 --- a/src/strategies/table.rs +++ b/src/strategies/table.rs @@ -15,10 +15,9 @@ pub(super) enum EntryFlag { Lowerbound, } -// TODO: Optimize size. Ideally 16 bytes or less. #[derive(Copy, Clone)] pub(super) struct Entry { - pub(super) hash: u64, + pub(super) high_hash: u32, pub(super) value: Evaluation, pub(super) depth: u8, pub(super) flag: EntryFlag, @@ -28,10 +27,13 @@ pub(super) struct Entry { #[test] fn test_entry_size() { - // TODO: ratchet down - assert!(std::mem::size_of::>() <= 24); - assert!(std::mem::size_of::>>() <= 32); - assert_eq!(std::mem::size_of::>(), 20); + assert!(std::mem::size_of::>() <= 16); + assert!(std::mem::size_of::>>() <= 20); + assert!(std::mem::size_of::>() <= 16); +} + +pub(super) fn high_bits(hash: u64) -> u32 { + (hash >> 32) as u32 } // A trait for a transposition table. The methods are mutual exclusion, but @@ -170,7 +172,7 @@ impl ShardedTable { let mut table = Vec::with_capacity(size); for _ in 0..size { table.push(Mutex::new(Entry:: { - hash: 0, + high_hash: 0, value: 0, depth: 0, flag: EntryFlag::Exact, @@ -187,7 +189,7 @@ impl Table for ShardedTable { let index = (hash as usize) & self.mask; for i in index..index + 2 { let entry = self.table[i].lock(); - if hash == entry.hash { + if high_bits(hash) == entry.high_hash { return Some(*entry); } } @@ -208,8 +210,14 @@ impl ConcurrentTable for ShardedTable { let table_gen = self.generation.load(Ordering::Relaxed); // index points to the first of a pair of entries, the depth-preferred entry and the always-replace entry. let index = (hash as usize) & self.mask; - let new_entry = - Entry { hash, value, depth, flag, generation: table_gen, best_move: Some(best_move) }; + let new_entry = Entry { + high_hash: high_bits(hash), + value, + depth, + flag, + generation: table_gen, + best_move: Some(best_move), + }; { let mut entry = self.table[index].lock(); if entry.generation != table_gen || entry.depth <= depth { @@ -244,7 +252,7 @@ impl RacyTable { let mut table = Vec::with_capacity(size); for _ in 0..size { table.push(Entry:: { - hash: 0, + high_hash: 0, value: 0, depth: 0, flag: EntryFlag::Exact, @@ -260,7 +268,7 @@ impl Table for RacyTable { fn lookup(&self, hash: u64) -> Option> { let index = (hash as usize) & self.mask; let entry = self.table[index]; - if hash == entry.hash { + if high_bits(hash) == entry.high_hash { return Some(entry); } None @@ -284,7 +292,7 @@ impl ConcurrentTable for RacyTable { #[allow(mutable_transmutes)] let ptr = unsafe { std::mem::transmute::<&Entry, &mut Entry>(entry) }; *ptr = Entry { - hash, + high_hash: high_bits(hash), value, depth, flag, @@ -321,11 +329,12 @@ impl Table for LockfreeTable { fn lookup(&self, hash: u64) -> Option> { let index = (hash as usize) & self.mask; let entry = &self.table[index]; - if (hash >> 32) as u32 == entry.high_hash.load(Ordering::SeqCst) { + let table_hash = entry.high_hash.load(Ordering::SeqCst); + if high_bits(hash) | 1 == table_hash | 1 { // Copy contents let ret = Some(Entry { // No one reads the hash. - hash: 0, + high_hash: 0, value: entry.value, depth: entry.depth, flag: entry.flag, @@ -333,7 +342,7 @@ impl Table for LockfreeTable { best_move: entry.best_move, }); // Verify the hash hasn't changed during the copy. - if (hash >> 32) as u32 == entry.high_hash.load(Ordering::SeqCst) { + if table_hash == entry.high_hash.load(Ordering::SeqCst) { return ret; } } @@ -386,18 +395,21 @@ impl ConcurrentTable for LockfreeTable { return; } // Try to set to sentinel value: - if entry.high_hash.compare_exchange_weak( - x, - Self::WRITING_SENTINEL, - Ordering::SeqCst, - Ordering::Relaxed, - ).is_err() { + if entry + .high_hash + .compare_exchange_weak( + x, + Self::WRITING_SENTINEL, + Ordering::SeqCst, + Ordering::Relaxed, + ) + .is_err() + { // Someone just started writing, just forget it. return; } // concurrent_lookup will throw out any read that occurs across a write. - // Unless it's a write of the same hash, but close enough. #[allow(mutable_transmutes)] let entry = unsafe { std::mem::transmute::<&ConcurrentEntry, &mut ConcurrentEntry>(entry) @@ -409,7 +421,14 @@ impl ConcurrentTable for LockfreeTable { entry.best_move = Some(best_move); // Set hash to correct value to indicate done. - entry.high_hash.store((hash >> 32) as u32, Ordering::SeqCst); + let new_hash = if high_bits(hash) | 1 == x | 1 { + // If we're overwriting the same hash, flip the lowest bit to + // catch any readers reading across this change. + x ^ 1 + } else { + high_bits(hash) + }; + entry.high_hash.store(new_hash, Ordering::SeqCst); } } diff --git a/src/util.rs b/src/util.rs index b465d46..20a69fe 100644 --- a/src/util.rs +++ b/src/util.rs @@ -50,7 +50,7 @@ impl Default for MovePool { impl MovePool { pub(crate) fn alloc(&mut self) -> Vec { - self.pool.pop().unwrap_or_else(Vec::new) + self.pool.pop().unwrap_or_default() } pub(crate) fn free(&mut self, mut vec: Vec) { From cede5515c6845f42b51f0573423d2f8fdc01678b Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Fri, 22 Apr 2022 15:01:56 -0700 Subject: [PATCH 063/162] Add MCTSOptions. --- src/lib.rs | 2 +- src/strategies/mcts.rs | 91 ++++++++++++++++++++++++++++-------------- tests/ttt.rs | 4 +- 3 files changed, 63 insertions(+), 34 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index ffbbd3c..88d5e4b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,7 +9,7 @@ pub use interface::{ }; pub use strategies::iterative::{IterativeOptions, IterativeSearch, Replacement}; pub use strategies::lazy_smp::{LazySmp, LazySmpOptions}; -pub use strategies::mcts::MonteCarloTreeSearch; +pub use strategies::mcts::{MCTSOptions, MonteCarloTreeSearch}; pub use strategies::negamax::Negamax; pub use strategies::random::Random; pub use strategies::ybw::{ParallelYbw, YbwOptions}; diff --git a/src/strategies/mcts.rs b/src/strategies/mcts.rs index 011a743..e4bb62d 100644 --- a/src/strategies/mcts.rs +++ b/src/strategies/mcts.rs @@ -77,22 +77,56 @@ impl Node { let win_ratio = (score + visits) / (2.0 * visits); win_ratio + exploration_score * (2.0 * log_parent_visits / visits).sqrt() } + + fn update_stats(&self, result: i32) -> i32 { + self.visits.fetch_add(1, Ordering::SeqCst); + self.score.fetch_add(result, Ordering::SeqCst); + result + } +} + +/// Options for MonteCarloTreeSearch. +pub struct MCTSOptions { + max_rollout_depth: u32, + rollouts_before_expanding: u32, + // None means use num_cpus. + // TODO: num_threads: Option, + // TODO: rollout_policy +} + +impl Default for MCTSOptions { + fn default() -> Self { + Self { max_rollout_depth: 100, rollouts_before_expanding: 0 } + } +} + +impl MCTSOptions { + /// Set a maximum depth for rollouts. Rollouts that reach this depth are + /// stopped and assigned a Draw value. + pub fn with_max_rollout_depth(mut self, depth: u32) -> Self { + self.max_rollout_depth = depth; + self + } + + /// How many rollouts to run on a single leaf node before expanding its + /// children. The default value is 0, where every rollout expands some + /// leaf node. + pub fn with_rollouts_before_expanding(mut self, rollouts: u32) -> Self { + self.rollouts_before_expanding = rollouts; + self + } } pub struct MonteCarloTreeSearch { // TODO: Evaluator - - // Config - max_rollout_depth: usize, + options: MCTSOptions, max_rollouts: u32, //max_time: Duration, - // TODO: rollouts_per_node - // TODO: num_threads } impl MonteCarloTreeSearch { - pub fn new() -> Self { - Self { max_rollout_depth: 200, max_rollouts: 100 } + pub fn new(options: MCTSOptions) -> Self { + Self { options, max_rollouts: 100 } } // Returns score for this node. +1 for win of original player to move. @@ -102,7 +136,7 @@ impl MonteCarloTreeSearch { G::S: Clone, { let mut rng = rand::thread_rng(); - let mut depth = self.max_rollout_depth; + let mut depth = self.options.max_rollout_depth; let mut state = s.clone(); let mut moves = Vec::new(); let mut sign = 1; @@ -134,31 +168,30 @@ impl MonteCarloTreeSearch { G::S: Clone, { if force_rollout { - let result = self.rollout::(state); - - // Backpropagate. - node.visits.fetch_add(1, Ordering::SeqCst); - node.score.fetch_add(result, Ordering::SeqCst); - return result; + return node.update_stats(self.rollout::(state)); } - let expansion = node.expansion.get().unwrap_or_else(|| { - // Expand this node, and force a rollout when we recurse. - force_rollout = true; - node.expansion.try_set(new_expansion::(state)) - }); + let expansion = match node.expansion.get() { + Some(expansion) => expansion, + None => { + // This is a leaf node. + if node.visits.load(Ordering::SeqCst) < self.options.rollouts_before_expanding { + // Just rollout from here. + return node.update_stats(self.rollout::(state)); + } else { + // Expand this node, and force a rollout when we recurse. + force_rollout = true; + node.expansion.try_set(new_expansion::(state)) + } + } + }; if let Some(winner) = expansion.winner { - let result = match winner { + return node.update_stats(match winner { Winner::PlayerJustMoved => 1, Winner::PlayerToMove => -1, Winner::Draw => 0, - }; - - // Backpropagate. - node.visits.fetch_add(1, Ordering::SeqCst); - node.score.fetch_add(result, Ordering::SeqCst); - return result; + }); } // Recurse. @@ -167,11 +200,7 @@ impl MonteCarloTreeSearch { m.apply(state); let result = -self.simulate::(next, state, force_rollout); m.undo(state); - - // Backpropagate. - node.visits.fetch_add(1, Ordering::SeqCst); - node.score.fetch_add(result, Ordering::SeqCst); - result + node.update_stats(result) } } diff --git a/tests/ttt.rs b/tests/ttt.rs index db67f26..25148b2 100644 --- a/tests/ttt.rs +++ b/tests/ttt.rs @@ -4,7 +4,7 @@ extern crate minimax; mod ttt; use minimax::util::battle_royale; -use minimax::{MonteCarloTreeSearch, Negamax, Random}; +use minimax::{MCTSOptions, MonteCarloTreeSearch, Negamax, Random}; // Ensure that two players using negamax always results in a draw. #[test] @@ -29,7 +29,7 @@ fn test_ttt_negamax_vs_random_always_wins_or_draws() { #[test] fn test_ttt_mcts_vs_random_always_wins_or_draws() { - let mut s1 = MonteCarloTreeSearch::new(); + let mut s1 = MonteCarloTreeSearch::new(MCTSOptions::default()); let mut s2 = Random::new(); for _ in 0..100 { assert_ne!(battle_royale::(&mut s1, &mut s2), Some(1)); From fc27941ccc1ec5a7359800d87c5d409f093c1a32 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Fri, 22 Apr 2022 16:57:58 -0700 Subject: [PATCH 064/162] Multi-threaded MCTS. Using a very lazy implementation, starting up N threads with each move. --- src/strategies/mcts.rs | 71 +++++++++++++++++++++++++++++++----------- tests/ttt.rs | 2 +- 2 files changed, 54 insertions(+), 19 deletions(-) diff --git a/src/strategies/mcts.rs b/src/strategies/mcts.rs index e4bb62d..c845c4d 100644 --- a/src/strategies/mcts.rs +++ b/src/strategies/mcts.rs @@ -3,7 +3,9 @@ use super::util::AtomicBox; use rand::seq::SliceRandom; use rand::Rng; +use std::sync::Arc; use std::sync::atomic::{AtomicI32, AtomicU32, Ordering}; +use std::thread::spawn; struct Node { // The Move to get from the parent to here. @@ -47,7 +49,10 @@ impl Node { // Choose best child based on UCT. fn best_child(&self, exploration_score: f32) -> Option<&Node> { - let log_visits = (self.visits.load(Ordering::SeqCst) as f32).log2(); + let mut log_visits = (self.visits.load(Ordering::SeqCst) as f32).log2(); + // Keep this numerator non-negative. + if log_visits < 0.0 { log_visits = 0.0; } + let expansion = self.expansion.get()?; // Find a node, randomly chosen among the best scores. // TODO: make it more uniformly random? @@ -86,17 +91,22 @@ impl Node { } /// Options for MonteCarloTreeSearch. +#[derive(Clone)] pub struct MCTSOptions { max_rollout_depth: u32, rollouts_before_expanding: u32, // None means use num_cpus. - // TODO: num_threads: Option, + num_threads: Option, // TODO: rollout_policy } impl Default for MCTSOptions { fn default() -> Self { - Self { max_rollout_depth: 100, rollouts_before_expanding: 0 } + Self { + max_rollout_depth: 100, + rollouts_before_expanding: 0, + num_threads: None, + } } } @@ -115,8 +125,17 @@ impl MCTSOptions { self.rollouts_before_expanding = rollouts; self } + + /// How many threads to run. Defaults to num_cpus. + pub fn with_num_threads(mut self, threads: u32) -> Self { + self.num_threads = Some(threads as usize); + self + } } +/// A strategy that uses random playouts to explore the game tree to decide on the best move. +/// This can be used without an Evaluator, just using the rules of the game. +#[derive(Clone)] pub struct MonteCarloTreeSearch { // TODO: Evaluator options: MCTSOptions, @@ -200,31 +219,47 @@ impl MonteCarloTreeSearch { m.apply(state); let result = -self.simulate::(next, state, force_rollout); m.undo(state); + + // Backpropagate. node.update_stats(result) } } impl Strategy for MonteCarloTreeSearch where - G::S: Clone, - G::M: Copy, + G::S: Clone + Send + 'static, + G::M: Copy + Send + Sync + 'static, { fn choose_move(&mut self, s: &G::S) -> Option { - let root = Node::::new(None); + let root = Arc::new(Node::::new(None)); root.expansion.try_set(new_expansion::(s)); - let mut state = s.clone(); - for _ in 0..self.max_rollouts { - self.simulate::(&root, &mut state, false); - } - debug_assert_eq!(self.max_rollouts, root.visits.load(Ordering::SeqCst)); + + let num_threads = self.options.num_threads.unwrap_or_else(num_cpus::get) as u32; + let num_rollouts = self.max_rollouts / num_threads; + + let threads = (1..num_threads).map(|_| { + let node = root.clone(); + let mut state = s.clone(); + let mcts = self.clone(); + spawn(move || { + for _ in 0..num_rollouts { + mcts.simulate::(&node, &mut state, false); + } + }) + }).collect::>(); + + let mut state = s.clone(); + let extra = self.max_rollouts - num_rollouts * num_threads; + for _ in 0..num_rollouts + extra { + self.simulate::(&root, &mut state, false); + } + + // Wait for threads. + for thread in threads { + thread.join().unwrap(); + } + let exploration = 0.0; // Just get best node. root.best_child(exploration).map(|node| node.m.unwrap()) } } - -mod tests { - // TODO: make a fake game with branching_factor=1 to test correct signage of results. - // TODO: make a game with branching_factor=2: add or subtract to shared total - - // or maybe just run tic tac toe against random many times and check that it always wins -} diff --git a/tests/ttt.rs b/tests/ttt.rs index 25148b2..c57451b 100644 --- a/tests/ttt.rs +++ b/tests/ttt.rs @@ -29,7 +29,7 @@ fn test_ttt_negamax_vs_random_always_wins_or_draws() { #[test] fn test_ttt_mcts_vs_random_always_wins_or_draws() { - let mut s1 = MonteCarloTreeSearch::new(MCTSOptions::default()); + let mut s1 = MonteCarloTreeSearch::new(MCTSOptions::default().with_num_threads(1)); let mut s2 = Random::new(); for _ in 0..100 { assert_ne!(battle_royale::(&mut s1, &mut s2), Some(1)); From f0c5bd95432ef3c0b3e98a9e351aef4abedc31c5 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Fri, 22 Apr 2022 19:46:38 -0700 Subject: [PATCH 065/162] Make max_rollouts configurable. Also cargo fmt previous changes. --- src/strategies/mcts.rs | 71 ++++++++++++++++++++++-------------------- 1 file changed, 38 insertions(+), 33 deletions(-) diff --git a/src/strategies/mcts.rs b/src/strategies/mcts.rs index c845c4d..58e062b 100644 --- a/src/strategies/mcts.rs +++ b/src/strategies/mcts.rs @@ -3,8 +3,8 @@ use super::util::AtomicBox; use rand::seq::SliceRandom; use rand::Rng; -use std::sync::Arc; use std::sync::atomic::{AtomicI32, AtomicU32, Ordering}; +use std::sync::Arc; use std::thread::spawn; struct Node { @@ -49,9 +49,11 @@ impl Node { // Choose best child based on UCT. fn best_child(&self, exploration_score: f32) -> Option<&Node> { - let mut log_visits = (self.visits.load(Ordering::SeqCst) as f32).log2(); - // Keep this numerator non-negative. - if log_visits < 0.0 { log_visits = 0.0; } + let mut log_visits = (self.visits.load(Ordering::SeqCst) as f32).log2(); + // Keep this numerator non-negative. + if log_visits < 0.0 { + log_visits = 0.0; + } let expansion = self.expansion.get()?; // Find a node, randomly chosen among the best scores. @@ -102,11 +104,7 @@ pub struct MCTSOptions { impl Default for MCTSOptions { fn default() -> Self { - Self { - max_rollout_depth: 100, - rollouts_before_expanding: 0, - num_threads: None, - } + Self { max_rollout_depth: 100, rollouts_before_expanding: 0, num_threads: None } } } @@ -128,8 +126,8 @@ impl MCTSOptions { /// How many threads to run. Defaults to num_cpus. pub fn with_num_threads(mut self, threads: u32) -> Self { - self.num_threads = Some(threads as usize); - self + self.num_threads = Some(threads as usize); + self } } @@ -148,6 +146,11 @@ impl MonteCarloTreeSearch { Self { options, max_rollouts: 100 } } + /// If no time limit is set, runs this many rollouts in choose_move. + pub fn set_max_rollouts(rollouts: u32) { + self.max_rollouts = rollouts; + } + // Returns score for this node. +1 for win of original player to move. // TODO: policy options: random, look 1 ahead for winning moves, BYO Evaluator. fn rollout(&self, s: &G::S) -> i32 @@ -220,7 +223,7 @@ impl MonteCarloTreeSearch { let result = -self.simulate::(next, state, force_rollout); m.undo(state); - // Backpropagate. + // Backpropagate. node.update_stats(result) } } @@ -234,30 +237,32 @@ where let root = Arc::new(Node::::new(None)); root.expansion.try_set(new_expansion::(s)); - let num_threads = self.options.num_threads.unwrap_or_else(num_cpus::get) as u32; - let num_rollouts = self.max_rollouts / num_threads; + let num_threads = self.options.num_threads.unwrap_or_else(num_cpus::get) as u32; + let num_rollouts = self.max_rollouts / num_threads; - let threads = (1..num_threads).map(|_| { - let node = root.clone(); - let mut state = s.clone(); - let mcts = self.clone(); - spawn(move || { - for _ in 0..num_rollouts { - mcts.simulate::(&node, &mut state, false); - } - }) - }).collect::>(); + let threads = (1..num_threads) + .map(|_| { + let node = root.clone(); + let mut state = s.clone(); + let mcts = self.clone(); + spawn(move || { + for _ in 0..num_rollouts { + mcts.simulate::(&node, &mut state, false); + } + }) + }) + .collect::>(); - let mut state = s.clone(); - let extra = self.max_rollouts - num_rollouts * num_threads; - for _ in 0..num_rollouts + extra { - self.simulate::(&root, &mut state, false); - } + let mut state = s.clone(); + let extra = self.max_rollouts - num_rollouts * num_threads; + for _ in 0..num_rollouts + extra { + self.simulate::(&root, &mut state, false); + } - // Wait for threads. - for thread in threads { - thread.join().unwrap(); - } + // Wait for threads. + for thread in threads { + thread.join().unwrap(); + } let exploration = 0.0; // Just get best node. root.best_child(exploration).map(|node| node.m.unwrap()) From 707e58a338c15292518ce8c8509c1c2060f9ef60 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Fri, 22 Apr 2022 19:48:43 -0700 Subject: [PATCH 066/162] Fix build. --- src/strategies/mcts.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/strategies/mcts.rs b/src/strategies/mcts.rs index 58e062b..c627b1f 100644 --- a/src/strategies/mcts.rs +++ b/src/strategies/mcts.rs @@ -147,7 +147,7 @@ impl MonteCarloTreeSearch { } /// If no time limit is set, runs this many rollouts in choose_move. - pub fn set_max_rollouts(rollouts: u32) { + pub fn set_max_rollouts(&mut self, rollouts: u32) { self.max_rollouts = rollouts; } From 4e5ce080702a0bc71a5a44ad4a644b5668f78732 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Fri, 22 Apr 2022 20:13:39 -0700 Subject: [PATCH 067/162] Generate the right moves in rollout. Not sure how this always beat Random... --- src/strategies/mcts.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/strategies/mcts.rs b/src/strategies/mcts.rs index c627b1f..44be7d6 100644 --- a/src/strategies/mcts.rs +++ b/src/strategies/mcts.rs @@ -176,7 +176,7 @@ impl MonteCarloTreeSearch { } moves.clear(); - G::generate_moves(s, &mut moves); + G::generate_moves(&state, &mut moves); let m = moves.choose(&mut rng).unwrap(); m.apply(&mut state); sign = -sign; From 6ab10634e5f121630757eb6ac3e7ff46bfb1fe21 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Sat, 23 Apr 2022 16:50:07 -0700 Subject: [PATCH 068/162] Add low-effort static move reordering based on Evaluators. --- src/interface.rs | 23 +++++++++++++++++++++++ src/strategies/iterative.rs | 32 ++++++++++++++++++++++++++++---- src/strategies/lazy_smp.rs | 11 +++++++++-- 3 files changed, 60 insertions(+), 6 deletions(-) diff --git a/src/interface.rs b/src/interface.rs index 3902820..d1f502d 100644 --- a/src/interface.rs +++ b/src/interface.rs @@ -21,6 +21,29 @@ pub trait Evaluator { /// Evaluate the non-terminal state from the persective of the player to /// move next. fn evaluate(&self, s: &::S) -> Evaluation; + + /// After generating moves, reorder them to explore the most promising first. + /// The default implementation evaluates all thes game states and sorts highest Evaluation first. + fn reorder_moves(&self, s: &mut ::S, moves: &mut [::M]) + where + ::M: Copy, + { + let mut evals = Vec::with_capacity(moves.len()); + for &m in moves.iter() { + m.apply(s); + let eval = if let Some(winner) = Self::G::get_winner(s) { + -winner.evaluate() + } else { + -self.evaluate(s) + }; + evals.push((eval, m)); + m.undo(s); + } + evals.sort_by_key(|eval| eval.0); + for (m, eval) in moves.iter_mut().zip(evals) { + *m = eval.1; + } + } } /// Defines how a move affects the game state. diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index 903b258..4b0b37e 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -118,6 +118,7 @@ pub struct IterativeOptions { aspiration_window: Option, step_increment: u8, max_quiescence_depth: u8, + min_reorder_moves_depth: u8, } impl IterativeOptions { @@ -129,6 +130,7 @@ impl IterativeOptions { aspiration_window: None, step_increment: 1, max_quiescence_depth: 0, + min_reorder_moves_depth: u8::MAX, } } } @@ -182,6 +184,15 @@ impl IterativeOptions { self.max_quiescence_depth = depth; self } + + /// Enable the Evaluator's move reordering after generating moves for all + /// nodes at this depth or higher. Reordering can be an expensive + /// operation, but it could cut off a lot of nodes if done well high in + /// the search tree. + pub fn with_min_reorder_moves_depth(mut self, depth: u8) -> Self { + self.min_reorder_moves_depth = depth; + self + } } pub(super) struct Negamaxer { @@ -193,6 +204,7 @@ pub(super) struct Negamaxer { // Config max_quiescence_depth: u8, null_window_search: bool, + min_reorder_moves_depth: u8, // Stats nodes_explored: u64, @@ -207,6 +219,7 @@ where { pub(super) fn new( table: T, eval: E, max_quiescence_depth: u8, null_window_search: bool, + min_reorder_moves_depth: u8, ) -> Self { Self { timeout: Arc::new(AtomicBool::new(false)), @@ -215,6 +228,7 @@ where move_pool: MovePool::default(), max_quiescence_depth, null_window_search, + min_reorder_moves_depth, nodes_explored: 0, total_generate_move_calls: 0, total_generated_moves: 0, @@ -303,11 +317,16 @@ where self.move_pool.free(moves); return Some(WORST_EVAL); } + + // Reorder moves. + if depth >= self.min_reorder_moves_depth { + self.eval.reorder_moves(s, &mut moves); + } if let Some(good) = good_move { - // Rearrange so predicted good move is first. + // Move predicted good move to the front. for i in 0..moves.len() { if moves[i] == good { - moves.swap(0, i); + moves[0..i + 1].rotate_right(1); break; } } @@ -391,8 +410,13 @@ where { pub fn new(eval: E, opts: IterativeOptions) -> IterativeSearch { let table = TranspositionTable::new(opts.table_byte_size, opts.strategy); - let negamaxer = - Negamaxer::new(table, eval, opts.max_quiescence_depth, opts.null_window_search); + let negamaxer = Negamaxer::new( + table, + eval, + opts.max_quiescence_depth, + opts.null_window_search, + opts.min_reorder_moves_depth, + ); IterativeSearch { max_depth: 100, max_time: Duration::from_secs(5), diff --git a/src/strategies/lazy_smp.rs b/src/strategies/lazy_smp.rs index a708e26..be2dec6 100644 --- a/src/strategies/lazy_smp.rs +++ b/src/strategies/lazy_smp.rs @@ -221,14 +221,21 @@ where let waiter = signal.clone(); spawn(move || { let mut helper = Helper { - negamaxer: Negamaxer::new(table2, eval2, opts.max_quiescence_depth, true), + negamaxer: Negamaxer::new( + table2, + eval2, + opts.max_quiescence_depth, + true, + u8::MAX, + ), command: command2, waiter, }; helper.process(); }); } - let negamaxer = Negamaxer::new(table.clone(), eval, opts.max_quiescence_depth, true); + let negamaxer = + Negamaxer::new(table.clone(), eval, opts.max_quiescence_depth, true, u8::MAX); LazySmp { max_depth: 100, max_time: Duration::from_secs(5), From c66f5bda7a668fdd068d22137a9f83caa7086fed Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Mon, 2 May 2022 22:16:23 -0700 Subject: [PATCH 069/162] Add stats to LazySMP. The stats are probably undercounting, as nothing waits for the helper threads to update stats before reading them. --- src/strategies/iterative.rs | 13 ++--- src/strategies/lazy_smp.rs | 106 +++++++++++++++++++++++++++++++----- 2 files changed, 98 insertions(+), 21 deletions(-) diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index 4b0b37e..e84fcd5 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -207,9 +207,9 @@ pub(super) struct Negamaxer { min_reorder_moves_depth: u8, // Stats - nodes_explored: u64, - total_generate_move_calls: u64, - total_generated_moves: u64, + pub(crate) nodes_explored: u64, + pub(crate) total_generate_move_calls: u64, + pub(crate) total_generated_moves: u64, } impl::M>> Negamaxer @@ -398,7 +398,6 @@ pub struct IterativeSearch { actual_depth: u8, // Nodes explored at each depth. nodes_explored: Vec, - table_hits: usize, pv: Vec<::M>, wall_time: Duration, } @@ -425,7 +424,6 @@ where opts, actual_depth: 0, nodes_explored: Vec::new(), - table_hits: 0, pv: Vec::new(), wall_time: Duration::default(), } @@ -455,9 +453,9 @@ where .powf((self.actual_depth as f64 + 1.0).recip()); let throughput = (total_nodes_explored + self.negamaxer.nodes_explored) as f64 / self.wall_time.as_secs_f64(); - format!("Explored {} nodes to depth {}. MBF={:.1} EBF={:.1}\nPartial exploration of next depth hit {} nodes.\n{} transposition table hits.\n{} nodes/sec", + format!("Explored {} nodes to depth {}. MBF={:.1} EBF={:.1}\nPartial exploration of next depth hit {} nodes.\n{} nodes/sec", total_nodes_explored, self.actual_depth, mean_branching_factor, effective_branching_factor, - self.negamaxer.nodes_explored, self.table_hits, throughput as usize) + self.negamaxer.nodes_explored, throughput as usize) } #[doc(hidden)] @@ -483,7 +481,6 @@ where self.nodes_explored.clear(); self.negamaxer.reset_stats(); self.actual_depth = 0; - self.table_hits = 0; let start_time = Instant::now(); // Start timer if configured. self.negamaxer.set_timeout(if self.max_time == Duration::new(0, 0) { diff --git a/src/strategies/lazy_smp.rs b/src/strategies/lazy_smp.rs index be2dec6..ec01129 100644 --- a/src/strategies/lazy_smp.rs +++ b/src/strategies/lazy_smp.rs @@ -15,7 +15,7 @@ use super::util::*; use rand::seq::SliceRandom; use std::cmp::max; -use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::atomic::{AtomicBool, AtomicU64, Ordering}; use std::sync::{Arc, Condvar, Mutex}; use std::thread::spawn; use std::time::{Duration, Instant}; @@ -27,6 +27,7 @@ pub struct LazySmpOptions { step_increment: u8, max_quiescence_depth: u8, aspiration_window: Option, + null_window_search: bool, // Default is one per core. num_threads: Option, // TODO: optional bonus thread local TT? @@ -42,6 +43,7 @@ impl LazySmpOptions { max_quiescence_depth: 0, aspiration_window: None, num_threads: None, + null_window_search: false, } } } @@ -52,6 +54,7 @@ impl Default for LazySmpOptions { } } +// TODO: extend this from IterativeOptions to share common fields. impl LazySmpOptions { /// Approximately how large the transposition table should be in memory. pub fn with_table_byte_size(mut self, size: usize) -> Self { @@ -74,17 +77,26 @@ impl LazySmpOptions { self } - pub fn with_num_threads(mut self, num_threads: usize) -> Self { - self.num_threads = Some(num_threads); - self - } - /// Whether to search first in a narrow window around the previous root /// value on each iteration. pub fn with_aspiration_window(mut self, window: Evaluation) -> Self { self.aspiration_window = Some(window); self } + + /// Whether to add null-window searches to try to prune branches that are + /// probably worse than those already found. Also known as principal + /// variation search. + pub fn with_null_window_search(mut self, null: bool) -> Self { + self.null_window_search = null; + self + } + + /// Set the total number of threads to use. Otherwise defaults to num_cpus. + pub fn with_num_threads(mut self, num_threads: usize) -> Self { + self.num_threads = Some(num_threads); + self + } } #[derive(Clone)] @@ -101,6 +113,41 @@ enum Command { Search(Search), } +struct SharedStats { + nodes_explored: AtomicU64, + generated_moves: AtomicU64, + generate_move_calls: AtomicU64, +} + +impl SharedStats { + fn new() -> Self { + Self { + nodes_explored: AtomicU64::new(0), + generated_moves: AtomicU64::new(0), + generate_move_calls: AtomicU64::new(0), + } + } + + fn reset(&self) { + self.nodes_explored.store(0, Ordering::SeqCst); + self.generated_moves.store(0, Ordering::SeqCst); + self.generate_move_calls.store(0, Ordering::SeqCst); + } + + fn update(&self, negamaxer: &mut Negamaxer) { + self.nodes_explored.fetch_add(negamaxer.nodes_explored, Ordering::SeqCst); + negamaxer.nodes_explored = 0; + self.generated_moves.fetch_add(negamaxer.total_generated_moves, Ordering::SeqCst); + negamaxer.total_generated_moves = 0; + self.generate_move_calls.fetch_add(negamaxer.total_generate_move_calls, Ordering::SeqCst); + negamaxer.total_generate_move_calls = 0; + } + + fn reset_nodes_explored(&self) -> u64 { + self.nodes_explored.swap(0, Ordering::SeqCst) + } +} + struct Helper where ::S: Clone, @@ -109,6 +156,7 @@ where negamaxer: Negamaxer::M>>>, command: Arc::S>>>, waiter: Arc, + stats: Arc, } impl Helper @@ -117,17 +165,31 @@ where ::M: Copy + Eq, { fn process(&mut self) { + let mut prev_hash: u64 = 0; + let mut prev_depth: u8 = 200; loop { let mut search = { let command = self.command.lock().unwrap(); - let command = - self.waiter.wait_while(command, |c| matches!(*c, Command::Wait)).unwrap(); + // Stay waiting during Wait command or if we already completed Search command. + let command = self + .waiter + .wait_while(command, |c| match *c { + Command::Exit => false, + Command::Wait => true, + Command::Search(ref search) => { + search.state.zobrist_hash() == prev_hash && search.depth == prev_depth + } + }) + .unwrap(); + // Do command. match *command { Command::Exit => return, Command::Wait => continue, Command::Search(ref search) => search.clone(), } }; + prev_hash = search.state.zobrist_hash(); + prev_depth = search.depth; self.negamaxer.set_timeout(search.timeout.clone()); let mut alpha = WORST_EVAL; @@ -161,6 +223,7 @@ where } // Computation finished or interrupted, go back to sleep. + self.stats.update(&mut self.negamaxer); } } } @@ -185,6 +248,7 @@ where actual_depth: u8, // Nodes explored at each depth. nodes_explored: Vec, + shared_stats: Arc, pv: Vec<::M>, wall_time: Duration, } @@ -213,23 +277,26 @@ where let table = Arc::new(LockfreeTable::new(opts.table_byte_size)); let command = Arc::new(Mutex::new(Command::Wait)); let signal = Arc::new(Condvar::new()); + let stats = Arc::new(SharedStats::new()); // start n-1 helper threads for _ in 1..opts.num_threads.unwrap_or_else(num_cpus::get) { let table2 = table.clone(); let eval2 = eval.clone(); let command2 = command.clone(); let waiter = signal.clone(); + let stats2 = stats.clone(); spawn(move || { let mut helper = Helper { negamaxer: Negamaxer::new( table2, eval2, opts.max_quiescence_depth, - true, + opts.null_window_search, u8::MAX, ), command: command2, waiter, + stats: stats2, }; helper.process(); }); @@ -247,6 +314,7 @@ where opts, actual_depth: 0, nodes_explored: Vec::new(), + shared_stats: stats, pv: Vec::new(), wall_time: Duration::default(), } @@ -267,10 +335,19 @@ where self.max_depth = 100; } - // TODO: gather stats from helper threads. - // Return a human-readable summary of the last move generation. - //pub fn stats(&self) -> String { - //} + /// Return a human-readable summary of the last move generation. + pub fn stats(&self) -> String { + let total_nodes_explored: u64 = self.nodes_explored.iter().sum(); + let mean_branching_factor = self.shared_stats.generated_moves.load(Ordering::SeqCst) as f64 + / self.shared_stats.generate_move_calls.load(Ordering::SeqCst) as f64; + let effective_branching_factor = (*self.nodes_explored.last().unwrap_or(&0) as f64) + .powf((self.actual_depth as f64 + 1.0).recip()); + let throughput = (total_nodes_explored + self.negamaxer.nodes_explored) as f64 + / self.wall_time.as_secs_f64(); + format!("Explored {} nodes to depth {}. MBF={:.1} EBF={:.1}\nPartial exploration of next depth hit {} nodes.\n{} nodes/sec", + total_nodes_explored, self.actual_depth, mean_branching_factor, effective_branching_factor, + self.negamaxer.nodes_explored, throughput as usize) + } #[doc(hidden)] pub fn root_value(&self) -> Evaluation { @@ -292,6 +369,7 @@ where fn choose_move(&mut self, s: &::S) -> Option<::M> { self.table.concurrent_advance_generation(); // Reset stats. + self.shared_stats.reset(); self.nodes_explored.clear(); self.actual_depth = 0; let start_time = Instant::now(); @@ -351,6 +429,8 @@ where self.prev_value = entry.value; depth += self.opts.step_increment; self.table.populate_pv(&mut self.pv, &mut s_clone, depth + 1); + self.shared_stats.update(&mut self.negamaxer); + self.nodes_explored.push(self.shared_stats.reset_nodes_explored()); } self.wall_time = start_time.elapsed(); best_move From 33f506a938445b137c158e00beeb5998fe1c37c6 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Tue, 3 May 2022 11:07:04 -0700 Subject: [PATCH 070/162] Consolidate duplicate options into reusing IterativeOptions. --- benches/negamax.rs | 3 +- examples/connect4.rs | 19 ++++------ src/strategies/iterative.rs | 41 ++++++++------------- src/strategies/lazy_smp.rs | 71 +++++-------------------------------- src/strategies/util.rs | 2 +- src/strategies/ybw.rs | 64 ++++++--------------------------- tests/strategies.rs | 30 ++++++---------- 7 files changed, 51 insertions(+), 179 deletions(-) diff --git a/benches/negamax.rs b/benches/negamax.rs index 67a5d99..ab80cb5 100644 --- a/benches/negamax.rs +++ b/benches/negamax.rs @@ -34,7 +34,8 @@ fn bench_parallel(b: &mut Bencher) { b.iter(|| { let mut s = ParallelYbw::new( connect4::BasicEvaluator::default(), - YbwOptions::new().with_table_byte_size(32_000), + IterativeOptions::new().with_table_byte_size(32_000), + YbwOptions::new(), ); s.set_max_depth(5); let m = s.choose_move(&board); diff --git a/examples/connect4.rs b/examples/connect4.rs index 3784134..8a8e07b 100644 --- a/examples/connect4.rs +++ b/examples/connect4.rs @@ -266,21 +266,14 @@ fn main() { ); dumb.set_max_depth(8); - let opts = IterativeOptions::new() - .with_table_byte_size(64_000_000) - .with_double_step_increment() - .with_aspiration_window(5); - let mut iterative = IterativeSearch::new(BasicEvaluator::default(), opts); + let opts = + IterativeOptions::new().with_table_byte_size(64_000_000).with_double_step_increment(); + let mut iterative = + IterativeSearch::new(BasicEvaluator::default(), opts.clone().with_aspiration_window(5)); iterative.set_max_depth(12); - let mut parallelybw = ParallelYbw::new( - BasicEvaluator::default(), - YbwOptions::new().with_table_byte_size(64_000_000).with_double_step_increment(), - ); + let mut parallelybw = ParallelYbw::new(BasicEvaluator::default(), opts, YbwOptions::new()); parallelybw.set_max_depth(12); - let mut lazysmp = LazySmp::new( - BasicEvaluator::default(), - LazySmpOptions::new().with_table_byte_size(64_000_000).with_double_step_increment(), - ); + let mut lazysmp = LazySmp::new(BasicEvaluator::default(), opts, LazySmpOptions::new()); lazysmp.set_max_depth(12); let mut strategies: [&mut dyn Strategy; 4] = diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index e84fcd5..35d77f3 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -112,13 +112,13 @@ impl Table for TranspositionTable { /// Options to use for the iterative search engine. #[derive(Clone, Copy)] pub struct IterativeOptions { - table_byte_size: usize, - strategy: Replacement, - null_window_search: bool, - aspiration_window: Option, - step_increment: u8, - max_quiescence_depth: u8, - min_reorder_moves_depth: u8, + pub(super) table_byte_size: usize, + pub(super) strategy: Replacement, + pub(super) null_window_search: bool, + pub(super) aspiration_window: Option, + pub(super) step_increment: u8, + pub(super) max_quiescence_depth: u8, + pub(super) min_reorder_moves_depth: u8, } impl IterativeOptions { @@ -202,9 +202,7 @@ pub(super) struct Negamaxer { eval: E, // Config - max_quiescence_depth: u8, - null_window_search: bool, - min_reorder_moves_depth: u8, + opts: IterativeOptions, // Stats pub(crate) nodes_explored: u64, @@ -217,18 +215,13 @@ where ::S: Zobrist, ::M: Copy + Eq, { - pub(super) fn new( - table: T, eval: E, max_quiescence_depth: u8, null_window_search: bool, - min_reorder_moves_depth: u8, - ) -> Self { + pub(super) fn new(table: T, eval: E, opts: IterativeOptions) -> Self { Self { timeout: Arc::new(AtomicBool::new(false)), table, eval, move_pool: MovePool::default(), - max_quiescence_depth, - null_window_search, - min_reorder_moves_depth, + opts, nodes_explored: 0, total_generate_move_calls: 0, total_generated_moves: 0, @@ -296,7 +289,7 @@ where if depth == 0 { // Evaluate quiescence search on leaf nodes. // Will just return the node's evaluation if quiescence search is disabled. - return self.noisy_negamax(s, self.max_quiescence_depth, alpha, beta); + return self.noisy_negamax(s, self.opts.max_quiescence_depth, alpha, beta); } if let Some(winner) = E::G::get_winner(s) { return Some(winner.evaluate()); @@ -319,7 +312,7 @@ where } // Reorder moves. - if depth >= self.min_reorder_moves_depth { + if depth >= self.opts.min_reorder_moves_depth { self.eval.reorder_moves(s, &mut moves); } if let Some(good) = good_move { @@ -357,7 +350,7 @@ where alpha = value; // Now that we've found a good move, assume following moves // are worse, and seek to cull them without full evaluation. - null_window = self.null_window_search; + null_window = self.opts.null_window_search; } if alpha >= beta { break; @@ -409,13 +402,7 @@ where { pub fn new(eval: E, opts: IterativeOptions) -> IterativeSearch { let table = TranspositionTable::new(opts.table_byte_size, opts.strategy); - let negamaxer = Negamaxer::new( - table, - eval, - opts.max_quiescence_depth, - opts.null_window_search, - opts.min_reorder_moves_depth, - ); + let negamaxer = Negamaxer::new(table, eval, opts.clone()); IterativeSearch { max_depth: 100, max_time: Duration::from_secs(5), diff --git a/src/strategies/lazy_smp.rs b/src/strategies/lazy_smp.rs index ec01129..2a37c70 100644 --- a/src/strategies/lazy_smp.rs +++ b/src/strategies/lazy_smp.rs @@ -9,7 +9,7 @@ extern crate num_cpus; extern crate rand; use super::super::interface::*; -use super::iterative::Negamaxer; +use super::iterative::{IterativeOptions, Negamaxer}; use super::table::*; use super::util::*; @@ -23,11 +23,6 @@ use std::time::{Duration, Instant}; /// Options to use for the iterative search engine. #[derive(Clone, Copy)] pub struct LazySmpOptions { - table_byte_size: usize, - step_increment: u8, - max_quiescence_depth: u8, - aspiration_window: Option, - null_window_search: bool, // Default is one per core. num_threads: Option, // TODO: optional bonus thread local TT? @@ -37,14 +32,7 @@ pub struct LazySmpOptions { impl LazySmpOptions { pub fn new() -> Self { - LazySmpOptions { - table_byte_size: 32_000_000, - step_increment: 1, - max_quiescence_depth: 0, - aspiration_window: None, - num_threads: None, - null_window_search: false, - } + LazySmpOptions { num_threads: None } } } @@ -54,44 +42,7 @@ impl Default for LazySmpOptions { } } -// TODO: extend this from IterativeOptions to share common fields. impl LazySmpOptions { - /// Approximately how large the transposition table should be in memory. - pub fn with_table_byte_size(mut self, size: usize) -> Self { - self.table_byte_size = size; - self - } - - /// Increment the depth by two between iterations. - pub fn with_double_step_increment(mut self) -> Self { - self.step_increment = 2; - self - } - - /// Enable [quiescence - /// search](https://en.wikipedia.org/wiki/Quiescence_search) at the leaves - /// of the search tree. The Game must implement `generate_noisy_moves` - /// for the search to know when the state has become "quiet". - pub fn with_quiescence_search_depth(mut self, depth: u8) -> Self { - self.max_quiescence_depth = depth; - self - } - - /// Whether to search first in a narrow window around the previous root - /// value on each iteration. - pub fn with_aspiration_window(mut self, window: Evaluation) -> Self { - self.aspiration_window = Some(window); - self - } - - /// Whether to add null-window searches to try to prune branches that are - /// probably worse than those already found. Also known as principal - /// variation search. - pub fn with_null_window_search(mut self, null: bool) -> Self { - self.null_window_search = null; - self - } - /// Set the total number of threads to use. Otherwise defaults to num_cpus. pub fn with_num_threads(mut self, num_threads: usize) -> Self { self.num_threads = Some(num_threads); @@ -240,7 +191,7 @@ where command: Arc::S>>>, signal: Arc, - opts: LazySmpOptions, + opts: IterativeOptions, // Runtime stats for the last move generated. prev_value: Evaluation, @@ -270,7 +221,7 @@ where ::M: Copy + Eq + Send, E: Clone + Send, { - pub fn new(eval: E, opts: LazySmpOptions) -> LazySmp + pub fn new(eval: E, opts: IterativeOptions, smp_opts: LazySmpOptions) -> LazySmp where E: 'static, { @@ -279,21 +230,16 @@ where let signal = Arc::new(Condvar::new()); let stats = Arc::new(SharedStats::new()); // start n-1 helper threads - for _ in 1..opts.num_threads.unwrap_or_else(num_cpus::get) { + for _ in 1..smp_opts.num_threads.unwrap_or_else(num_cpus::get) { let table2 = table.clone(); let eval2 = eval.clone(); + let opts2 = opts.clone(); let command2 = command.clone(); let waiter = signal.clone(); let stats2 = stats.clone(); spawn(move || { let mut helper = Helper { - negamaxer: Negamaxer::new( - table2, - eval2, - opts.max_quiescence_depth, - opts.null_window_search, - u8::MAX, - ), + negamaxer: Negamaxer::new(table2, eval2, opts2), command: command2, waiter, stats: stats2, @@ -301,8 +247,7 @@ where helper.process(); }); } - let negamaxer = - Negamaxer::new(table.clone(), eval, opts.max_quiescence_depth, true, u8::MAX); + let negamaxer = Negamaxer::new(table.clone(), eval, opts.clone()); LazySmp { max_depth: 100, max_time: Duration::from_secs(5), diff --git a/src/strategies/util.rs b/src/strategies/util.rs index 18ca56f..0092dd1 100644 --- a/src/strategies/util.rs +++ b/src/strategies/util.rs @@ -41,7 +41,7 @@ pub(super) fn timeout_signal(dur: Duration) -> Arc { signal } -// This exists to be wrapped in a mutex, because it didn't work when I tried a tuple.' +// This exists to be wrapped in a mutex, because it didn't work when I tried a tuple. pub(super) struct ValueMove { value: Evaluation, m: M, diff --git a/src/strategies/ybw.rs b/src/strategies/ybw.rs index ad76e8b..f7b35c9 100644 --- a/src/strategies/ybw.rs +++ b/src/strategies/ybw.rs @@ -8,6 +8,7 @@ extern crate rayon; use super::super::interface::*; +use super::iterative::IterativeOptions; use super::table::*; use super::util::*; @@ -20,22 +21,12 @@ use std::time::{Duration, Instant}; /// Options to use for the parallel search engine. #[derive(Clone, Copy)] pub struct YbwOptions { - table_byte_size: usize, - null_window_search: bool, - step_increment: u8, - max_quiescence_depth: u8, serial_cutoff_depth: u8, } impl YbwOptions { pub fn new() -> Self { - YbwOptions { - table_byte_size: 32_000_000, - null_window_search: true, - step_increment: 1, - max_quiescence_depth: 0, - serial_cutoff_depth: 1, - } + YbwOptions { serial_cutoff_depth: 1 } } } @@ -46,32 +37,9 @@ impl Default for YbwOptions { } impl YbwOptions { - /// Approximately how large the transposition table should be in memory. - pub fn with_table_byte_size(mut self, size: usize) -> Self { - self.table_byte_size = size; - self - } - - /// Whether to add null-window searches to try to prune branches that are - /// probably worse than those already found. Also known as principal - /// variation search. - pub fn with_null_window_search(mut self, null: bool) -> Self { - self.null_window_search = null; - self - } - - /// Increment the depth by two between iterations. - pub fn with_double_step_increment(mut self) -> Self { - self.step_increment = 2; - self - } - - /// Enable [quiescence - /// search](https://en.wikipedia.org/wiki/Quiescence_search) at the leaves - /// of the search tree. The Game must implement `generate_noisy_moves` - /// for the search to know when the state has become "quiet". - pub fn with_quiescence_search_depth(mut self, depth: u8) -> Self { - self.max_quiescence_depth = depth; + /// At what depth should we stop trying to parallelize and just run serially. + pub fn with_serial_cutoff_depth(mut self, depth: u8) -> Self { + self.serial_cutoff_depth = depth; self } } @@ -85,7 +53,8 @@ pub struct ParallelYbw { prev_value: Evaluation, eval: E, - opts: YbwOptions, + opts: IterativeOptions, + ybw_opts: YbwOptions, // Runtime stats for the last move generated. @@ -105,7 +74,7 @@ pub struct ParallelYbw { } impl ParallelYbw { - pub fn new(eval: E, opts: YbwOptions) -> ParallelYbw { + pub fn new(eval: E, opts: IterativeOptions, ybw_opts: YbwOptions) -> ParallelYbw { let table = LockfreeTable::new(opts.table_byte_size); ParallelYbw { max_depth: 100, @@ -115,6 +84,7 @@ impl ParallelYbw { //move_pool: MovePool::<_>::default(), prev_value: 0, opts, + ybw_opts, eval, actual_depth: 0, nodes_explored: Vec::new(), @@ -142,20 +112,6 @@ impl ParallelYbw { self.max_depth = 100; } - /// Return a human-readable summary of the last move generation. - pub fn stats(&self) -> String { - let total_nodes_explored: u64 = self.nodes_explored.iter().sum(); - let mean_branching_factor = - self.total_generated_moves as f64 / self.total_generate_move_calls as f64; - let effective_branching_factor = (*self.nodes_explored.last().unwrap_or(&0) as f64) - .powf((self.actual_depth as f64 + 1.0).recip()); - let throughput = - (total_nodes_explored + self.next_depth_nodes) as f64 / self.wall_time.as_secs_f64(); - format!("Explored {} nodes to depth {}. MBF={:.1} EBF={:.1}\nPartial exploration of next depth hit {} nodes.\n{} transposition table hits.\n{} nodes/sec", - total_nodes_explored, self.actual_depth, mean_branching_factor, effective_branching_factor, - self.next_depth_nodes, self.table_hits, throughput as usize) - } - #[doc(hidden)] pub fn root_value(&self) -> Evaluation { unclamp_value(self.prev_value) @@ -258,7 +214,7 @@ impl ParallelYbw { let (best, best_move) = if alpha >= beta { // Skip search (initial_value, first_move) - } else if self.opts.serial_cutoff_depth >= depth { + } else if self.ybw_opts.serial_cutoff_depth >= depth { // Serial search let mut best = initial_value; let mut best_move = first_move; diff --git a/tests/strategies.rs b/tests/strategies.rs index 9e9c540..2810b92 100644 --- a/tests/strategies.rs +++ b/tests/strategies.rs @@ -176,10 +176,8 @@ fn compare_plain_negamax() { ); } - let mut ybw = ParallelYbw::new( - RandomEvaluator::default(), - YbwOptions::default().with_table_byte_size(64000), - ); + let opt = IterativeOptions::new().with_table_byte_size(64000); + let mut ybw = ParallelYbw::new(RandomEvaluator::default(), opt, YbwOptions::default()); ybw.set_max_depth(max_depth); let ybw_move = ybw.choose_move(&b).unwrap(); let ybw_value = ybw.root_value(); @@ -192,10 +190,8 @@ fn compare_plain_negamax() { b ); - let mut lazysmp = LazySmp::new( - RandomEvaluator::default(), - LazySmpOptions::default().with_table_byte_size(64000), - ); + let mut lazysmp = + LazySmp::new(RandomEvaluator::default(), opt, LazySmpOptions::default()); lazysmp.set_max_depth(max_depth); let lazysmp_move = lazysmp.choose_move(&b).unwrap(); let lazysmp_value = lazysmp.root_value(); @@ -215,6 +211,7 @@ fn compare_plain_negamax() { // more parallelism in the parallel strategies. #[test] fn compare_deep_negamax() { + let opt = IterativeOptions::new().with_table_byte_size(64000); for _ in 0..10 { for max_depth in 0..9 { let b = generate_random_state(10); @@ -223,28 +220,21 @@ fn compare_deep_negamax() { negamax.choose_move(&b).unwrap(); let value = negamax.root_value(); - let mut iterative = IterativeSearch::new( - RandomEvaluator::default(), - IterativeOptions::new().with_table_byte_size(64000), - ); + let mut iterative = IterativeSearch::new(RandomEvaluator::default(), opt); iterative.set_max_depth(max_depth); iterative.choose_move(&b).unwrap(); let iterative_value = iterative.root_value(); assert_eq!(value, iterative_value, "search depth={}\n{}", max_depth, b); - let mut parallel = ParallelYbw::new( - RandomEvaluator::default(), - YbwOptions::default().with_table_byte_size(64000), - ); + let mut parallel = + ParallelYbw::new(RandomEvaluator::default(), opt, YbwOptions::default()); parallel.set_max_depth(max_depth); parallel.choose_move(&b).unwrap(); let parallel_value = parallel.root_value(); assert_eq!(value, parallel_value, "search depth={}\n{}", max_depth, b); - let mut lazysmp = LazySmp::new( - RandomEvaluator::default(), - LazySmpOptions::default().with_table_byte_size(64000), - ); + let mut lazysmp = + LazySmp::new(RandomEvaluator::default(), opt, LazySmpOptions::default()); lazysmp.set_max_depth(max_depth); lazysmp.choose_move(&b).unwrap(); let lazysmp_value = lazysmp.root_value(); From 66a496f804f3b92d71c9d8972137d9a263efba6b Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Tue, 3 May 2022 11:27:15 -0700 Subject: [PATCH 071/162] Add timeout option to MCTS. --- src/strategies/mcts.rs | 63 ++++++++++++++++++++++++++++++++---------- tests/ttt.rs | 1 + 2 files changed, 49 insertions(+), 15 deletions(-) diff --git a/src/strategies/mcts.rs b/src/strategies/mcts.rs index 44be7d6..01c4afb 100644 --- a/src/strategies/mcts.rs +++ b/src/strategies/mcts.rs @@ -1,11 +1,12 @@ use super::super::interface::*; -use super::util::AtomicBox; +use super::util::{timeout_signal, AtomicBox}; use rand::seq::SliceRandom; use rand::Rng; -use std::sync::atomic::{AtomicI32, AtomicU32, Ordering}; +use std::sync::atomic::{AtomicBool, AtomicI32, AtomicU32, Ordering}; use std::sync::Arc; use std::thread::spawn; +use std::time::Duration; struct Node { // The Move to get from the parent to here. @@ -85,10 +86,11 @@ impl Node { win_ratio + exploration_score * (2.0 * log_parent_visits / visits).sqrt() } - fn update_stats(&self, result: i32) -> i32 { + fn update_stats(&self, result: i32) -> Option { self.visits.fetch_add(1, Ordering::SeqCst); self.score.fetch_add(result, Ordering::SeqCst); - result + // Always return Some, as we aren't timed out. + Some(result) } } @@ -138,16 +140,29 @@ pub struct MonteCarloTreeSearch { // TODO: Evaluator options: MCTSOptions, max_rollouts: u32, - //max_time: Duration, + max_time: Duration, + timeout: Arc, } impl MonteCarloTreeSearch { pub fn new(options: MCTSOptions) -> Self { - Self { options, max_rollouts: 100 } + Self { + options, + max_rollouts: 0, + max_time: Duration::from_secs(5), + timeout: Arc::new(AtomicBool::new(false)), + } + } + + /// Set the time limit per move. + pub fn set_timeout(&mut self, timeout: Duration) { + self.max_rollouts = 0; + self.max_time = timeout; } - /// If no time limit is set, runs this many rollouts in choose_move. + /// Instead of a timeout, run this many rollouts to choose a move. pub fn set_max_rollouts(&mut self, rollouts: u32) { + self.max_time = Duration::default(); self.max_rollouts = rollouts; } @@ -185,10 +200,15 @@ impl MonteCarloTreeSearch { } // Explore the tree, make a new node, rollout, backpropagate. - fn simulate(&self, node: &Node, state: &mut G::S, mut force_rollout: bool) -> i32 + fn simulate( + &self, node: &Node, state: &mut G::S, mut force_rollout: bool, + ) -> Option where G::S: Clone, { + if self.timeout.load(Ordering::Relaxed) { + return None; + } if force_rollout { return node.update_stats(self.rollout::(state)); } @@ -220,7 +240,7 @@ impl MonteCarloTreeSearch { let next = node.best_child(1.).unwrap(); let m = next.m.as_ref().unwrap(); m.apply(state); - let result = -self.simulate::(next, state, force_rollout); + let result = -self.simulate::(next, state, force_rollout)?; m.undo(state); // Backpropagate. @@ -238,7 +258,17 @@ where root.expansion.try_set(new_expansion::(s)); let num_threads = self.options.num_threads.unwrap_or_else(num_cpus::get) as u32; - let num_rollouts = self.max_rollouts / num_threads; + let (rollouts_per_thread, extra) = if self.max_rollouts == 0 { + (u32::MAX, 0) + } else { + let rollouts_per_thread = self.max_rollouts / num_threads; + (rollouts_per_thread, self.max_rollouts - rollouts_per_thread * num_threads) + }; + self.timeout = if self.max_time == Duration::default() { + Arc::new(AtomicBool::new(false)) + } else { + timeout_signal(self.max_time) + }; let threads = (1..num_threads) .map(|_| { @@ -246,17 +276,20 @@ where let mut state = s.clone(); let mcts = self.clone(); spawn(move || { - for _ in 0..num_rollouts { - mcts.simulate::(&node, &mut state, false); + for _ in 0..rollouts_per_thread { + if mcts.simulate::(&node, &mut state, false).is_none() { + break; + } } }) }) .collect::>(); let mut state = s.clone(); - let extra = self.max_rollouts - num_rollouts * num_threads; - for _ in 0..num_rollouts + extra { - self.simulate::(&root, &mut state, false); + for _ in 0..rollouts_per_thread + extra { + if self.simulate::(&root, &mut state, false).is_none() { + break; + } } // Wait for threads. diff --git a/tests/ttt.rs b/tests/ttt.rs index c57451b..ba23bcb 100644 --- a/tests/ttt.rs +++ b/tests/ttt.rs @@ -30,6 +30,7 @@ fn test_ttt_negamax_vs_random_always_wins_or_draws() { #[test] fn test_ttt_mcts_vs_random_always_wins_or_draws() { let mut s1 = MonteCarloTreeSearch::new(MCTSOptions::default().with_num_threads(1)); + s1.set_max_rollouts(100); let mut s2 = Random::new(); for _ in 0..100 { assert_ne!(battle_royale::(&mut s1, &mut s2), Some(1)); From e5fb61152c4ce21c485c4160a9dde95cf2901f00 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Tue, 3 May 2022 16:01:13 -0700 Subject: [PATCH 072/162] Remove unused ShardedTable and its parking_lot dependency. LockfreeTable is 80% of the way to the performance of RacyTable. --- Cargo.toml | 1 - src/strategies/table.rs | 82 ----------------------------------------- 2 files changed, 83 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index b9533c1..ba56ecd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,6 @@ license = "MIT" [dependencies] num_cpus = "1.0" -parking_lot = "^0.11" rand = "0.8" rayon = "^1.5" diff --git a/src/strategies/table.rs b/src/strategies/table.rs index 66b155d..bd9de5f 100644 --- a/src/strategies/table.rs +++ b/src/strategies/table.rs @@ -1,7 +1,4 @@ -extern crate parking_lot; - use crate::interface::*; -use parking_lot::Mutex; use std::cmp::{max, min}; use std::sync::atomic::{AtomicU32, AtomicU8, Ordering}; use std::sync::Arc; @@ -28,7 +25,6 @@ pub(super) struct Entry { #[test] fn test_entry_size() { assert!(std::mem::size_of::>() <= 16); - assert!(std::mem::size_of::>>() <= 20); assert!(std::mem::size_of::>() <= 16); } @@ -156,84 +152,6 @@ impl + ConcurrentTable> Table for Arc { } } -pub(super) struct ShardedTable { - table: Vec>>, - mask: usize, - // Incremented for each iterative deepening run. - // Values from old generations are always overwritten. - generation: AtomicU8, -} - -#[allow(dead_code)] -impl ShardedTable { - pub(super) fn new(table_byte_size: usize) -> Self { - let size = (table_byte_size / std::mem::size_of::>>()).next_power_of_two(); - let mask = (size - 1) & !1; - let mut table = Vec::with_capacity(size); - for _ in 0..size { - table.push(Mutex::new(Entry:: { - high_hash: 0, - value: 0, - depth: 0, - flag: EntryFlag::Exact, - generation: 0, - best_move: None, - })); - } - Self { table, mask, generation: AtomicU8::new(0) } - } -} - -impl Table for ShardedTable { - fn lookup(&self, hash: u64) -> Option> { - let index = (hash as usize) & self.mask; - for i in index..index + 2 { - let entry = self.table[i].lock(); - if high_bits(hash) == entry.high_hash { - return Some(*entry); - } - } - None - } - fn store(&mut self, hash: u64, value: Evaluation, depth: u8, flag: EntryFlag, best_move: M) { - self.concurrent_store(hash, value, depth, flag, best_move) - } - fn advance_generation(&mut self) { - self.concurrent_advance_generation() - } -} - -impl ConcurrentTable for ShardedTable { - fn concurrent_store( - &self, hash: u64, value: Evaluation, depth: u8, flag: EntryFlag, best_move: M, - ) { - let table_gen = self.generation.load(Ordering::Relaxed); - // index points to the first of a pair of entries, the depth-preferred entry and the always-replace entry. - let index = (hash as usize) & self.mask; - let new_entry = Entry { - high_hash: high_bits(hash), - value, - depth, - flag, - generation: table_gen, - best_move: Some(best_move), - }; - { - let mut entry = self.table[index].lock(); - if entry.generation != table_gen || entry.depth <= depth { - *entry = new_entry; - return; - } - } - // Otherwise, always overwrite second entry. - *self.table[index + 1].lock() = new_entry; - } - - fn concurrent_advance_generation(&self) { - self.generation.fetch_add(1, Ordering::SeqCst); - } -} - // A concurrent table that doesn't bother to use atomic operations to access its entries. // It's crazily unsafe, but somehow StockFish gets away with this? pub(super) struct RacyTable { From 0f8242908db58321f9d7cbd9efd9715fdb4766e3 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Tue, 3 May 2022 21:34:54 -0700 Subject: [PATCH 073/162] Implement alternating depth searches on LazySmp helper threads. This seems to go totally against what negamax is supposed to be computing but it seems really common so I added the option. --- src/strategies/lazy_smp.rs | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/src/strategies/lazy_smp.rs b/src/strategies/lazy_smp.rs index 2a37c70..7689ae7 100644 --- a/src/strategies/lazy_smp.rs +++ b/src/strategies/lazy_smp.rs @@ -25,14 +25,14 @@ use std::time::{Duration, Instant}; pub struct LazySmpOptions { // Default is one per core. num_threads: Option, + differing_depths: bool, // TODO: optional bonus thread local TT? // TODO: min_TT_depth? - // TODO: alternating depths in alternating threads } impl LazySmpOptions { pub fn new() -> Self { - LazySmpOptions { num_threads: None } + LazySmpOptions { num_threads: None, differing_depths: false } } } @@ -48,6 +48,12 @@ impl LazySmpOptions { self.num_threads = Some(num_threads); self } + + /// Enables the helper threads to explore the tree at multiple depths simultaneously. + pub fn with_differing_depths(mut self) -> Self { + self.differing_depths = true; + self + } } #[derive(Clone)] @@ -108,6 +114,7 @@ where command: Arc::S>>>, waiter: Arc, stats: Arc, + extra_depth: u8, } impl Helper @@ -142,12 +149,13 @@ where prev_hash = search.state.zobrist_hash(); prev_depth = search.depth; + let depth = search.depth + self.extra_depth; self.negamaxer.set_timeout(search.timeout.clone()); let mut alpha = WORST_EVAL; let mut beta = BEST_EVAL; self.negamaxer.table.check( search.state.zobrist_hash(), - search.depth, + depth, &mut None, &mut alpha, &mut beta, @@ -160,9 +168,7 @@ where // Negamax search the rest. for m in moves { m.apply(&mut search.state); - if let Some(value) = - self.negamaxer.negamax(&mut search.state, search.depth, alpha, beta) - { + if let Some(value) = self.negamaxer.negamax(&mut search.state, depth, alpha, beta) { alpha = max(alpha, -value); } else { break; @@ -230,19 +236,21 @@ where let signal = Arc::new(Condvar::new()); let stats = Arc::new(SharedStats::new()); // start n-1 helper threads - for _ in 1..smp_opts.num_threads.unwrap_or_else(num_cpus::get) { + for iter in 1..smp_opts.num_threads.unwrap_or_else(num_cpus::get) { let table2 = table.clone(); let eval2 = eval.clone(); let opts2 = opts.clone(); let command2 = command.clone(); let waiter = signal.clone(); let stats2 = stats.clone(); + let extra_depth = if smp_opts.differing_depths { iter as u8 & 1 } else { 0 }; spawn(move || { let mut helper = Helper { negamaxer: Negamaxer::new(table2, eval2, opts2), command: command2, waiter, stats: stats2, + extra_depth, }; helper.process(); }); From 76f47490a4e49a88432f399830f3905572e092ff Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Tue, 3 May 2022 21:43:04 -0700 Subject: [PATCH 074/162] Release 0.2.1 This adds multiple new Strategies: - ParallelYbw implements iterative deepening by parallelizing the search with rayon. - LazySmp implements iterative deepening with extra threads working on other parts of the tree. - MonteCarloTreeSearch can choose moves with random rollouts without an evaluation function. --- Cargo.toml | 2 +- README.md | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index ba56ecd..1b9b289 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "minimax" -version = "0.2.0" +version = "0.2.1" authors = [ "Samuel Fredrickson ", "Eric Roshan-Eisner " ] description = "Generic implementations of Minimax." documentation = "https://docs.rs/minimax" diff --git a/README.md b/README.md index 2146793..1fe1cfe 100644 --- a/README.md +++ b/README.md @@ -12,10 +12,12 @@ This library provides interfaces that describe: 2. methods of evaluating particular game states for a player; and 3. strategies for choosing moves for a player. -The eventual goal is to have multiple proper strategies, so that any -combination of evaluators and strategies can be tested against each -other. Currently, single-threaded algorithms using alpha-beta pruning, -iterative deeping, and transposition tables are provided. +This crate implements multiple different strategies, so that any combination of +custom evaluators and strategies can be tested against each other. These include +single- and multi-threaded algorithms using alpha-beta pruning, iterative +deepening, and transposition tables. There is also a basic implementation of +multi-threaded Monte Carlo Tree Search, which does not require writing an +evaluator. ## Example From 8f336f1ec4bf394376ca3bbd7c51d9a7377c0dd8 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Wed, 4 May 2022 13:13:19 -0700 Subject: [PATCH 075/162] Some basic verbosity setting. --- src/strategies/iterative.rs | 10 +++++++++- src/strategies/lazy_smp.rs | 28 ++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index 35d77f3..8d1edd8 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -109,7 +109,7 @@ impl Table for TranspositionTable { } } -/// Options to use for the iterative search engine. +/// Options to use for the iterative search engines. #[derive(Clone, Copy)] pub struct IterativeOptions { pub(super) table_byte_size: usize, @@ -119,6 +119,7 @@ pub struct IterativeOptions { pub(super) step_increment: u8, pub(super) max_quiescence_depth: u8, pub(super) min_reorder_moves_depth: u8, + pub(super) verbose: bool, } impl IterativeOptions { @@ -131,6 +132,7 @@ impl IterativeOptions { step_increment: 1, max_quiescence_depth: 0, min_reorder_moves_depth: u8::MAX, + verbose: false, } } } @@ -193,6 +195,12 @@ impl IterativeOptions { self.min_reorder_moves_depth = depth; self } + + /// Enable verbose print statements of the ongoing performance of the search. + pub fn verbose(mut self) -> Self { + self.verbose = true; + self + } } pub(super) struct Negamaxer { diff --git a/src/strategies/lazy_smp.rs b/src/strategies/lazy_smp.rs index 7689ae7..1182bac 100644 --- a/src/strategies/lazy_smp.rs +++ b/src/strategies/lazy_smp.rs @@ -336,10 +336,15 @@ where let root_hash = s.zobrist_hash(); let mut s_clone = s.clone(); let mut best_move = None; + let mut interval_start = start_time; let mut depth = self.max_depth as u8 % self.opts.step_increment; while depth <= self.max_depth as u8 { // First, a serial aspiration search to at least establish some bounds. + if self.opts.verbose { + interval_start = Instant::now(); + println!("LazySmp search depth {} around {}", depth + 1, self.prev_value); + } if self .negamaxer .aspiration_search( @@ -353,6 +358,20 @@ where // Timeout. break; } + if self.opts.verbose { + let mut alpha = WORST_EVAL; + let mut beta = BEST_EVAL; + self.negamaxer.table.check(root_hash, depth + 1, &mut None, &mut alpha, &mut beta); + let end = Instant::now(); + let interval = end - interval_start; + println!( + "LazySmp aspiration search took {}ms; within bounds {}:{}", + interval.as_millis(), + alpha, + beta + ); + interval_start = end; + } let iteration_done = Arc::new(AtomicBool::new(false)); { @@ -375,6 +394,15 @@ where break; } + if self.opts.verbose { + let interval = Instant::now() - interval_start; + println!( + "LazySmp full search took {}ms; returned {:?}", + interval.as_millis(), + value.unwrap() + ); + } + let entry = self.table.lookup(root_hash).unwrap(); best_move = entry.best_move; From 8439f10ca8cd65a72f0675dea1cba0043c0275d8 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Thu, 5 May 2022 21:36:44 -0700 Subject: [PATCH 076/162] Encapsulate LazySmp helper thread communication into CommandSignal. --- src/strategies/lazy_smp.rs | 92 ++++++++++++++++++++++---------------- 1 file changed, 54 insertions(+), 38 deletions(-) diff --git a/src/strategies/lazy_smp.rs b/src/strategies/lazy_smp.rs index 1182bac..cc5da79 100644 --- a/src/strategies/lazy_smp.rs +++ b/src/strategies/lazy_smp.rs @@ -111,8 +111,7 @@ where ::M: Copy + Eq, { negamaxer: Negamaxer::M>>>, - command: Arc::S>>>, - waiter: Arc, + signal: Arc::S>>, stats: Arc, extra_depth: u8, } @@ -127,10 +126,11 @@ where let mut prev_depth: u8 = 200; loop { let mut search = { - let command = self.command.lock().unwrap(); + let command = self.signal.command.lock().unwrap(); // Stay waiting during Wait command or if we already completed Search command. let command = self - .waiter + .signal + .signal .wait_while(command, |c| match *c { Command::Exit => false, Command::Wait => true, @@ -185,6 +185,50 @@ where } } +struct CommandSignal { + command: Mutex>, + signal: Condvar, +} + +impl CommandSignal +where + S: Clone, +{ + fn new() -> Self { + Self { command: Mutex::new(Command::Wait), signal: Condvar::new() } + } + + fn update(&self, new_command: Command) { + let mut command = self.command.lock().unwrap(); + if let Command::Search(ref search) = *command { + search.timeout.store(true, Ordering::SeqCst); + } + *command = new_command; + self.signal.notify_all(); + } + + fn wait(&self) { + self.update(Command::Wait); + } + + fn new_search(&self, state: &S, depth: u8) { + self.update(Command::Search(Search { + state: state.clone(), + depth, + timeout: Arc::new(AtomicBool::new(false)), + })); + } +} + +impl Drop for CommandSignal +where + S: Clone, +{ + fn drop(&mut self) { + self.update(Command::Exit); + } +} + pub struct LazySmp where ::S: Clone + Zobrist, @@ -194,8 +238,7 @@ where max_time: Duration, table: Arc::M>>, negamaxer: Negamaxer::M>>>, - command: Arc::S>>>, - signal: Arc, + signal: Arc::S>>, opts: IterativeOptions, @@ -210,17 +253,6 @@ where wall_time: Duration, } -impl Drop for LazySmp -where - ::S: Clone + Zobrist, - ::M: Copy + Eq, -{ - fn drop(&mut self) { - *self.command.lock().unwrap() = Command::Exit; - self.signal.notify_all(); - } -} - impl LazySmp where ::S: Clone + Zobrist + Send, @@ -232,23 +264,20 @@ where E: 'static, { let table = Arc::new(LockfreeTable::new(opts.table_byte_size)); - let command = Arc::new(Mutex::new(Command::Wait)); - let signal = Arc::new(Condvar::new()); let stats = Arc::new(SharedStats::new()); + let signal = Arc::new(CommandSignal::new()); // start n-1 helper threads for iter in 1..smp_opts.num_threads.unwrap_or_else(num_cpus::get) { let table2 = table.clone(); let eval2 = eval.clone(); let opts2 = opts.clone(); - let command2 = command.clone(); - let waiter = signal.clone(); + let signal2 = signal.clone(); let stats2 = stats.clone(); let extra_depth = if smp_opts.differing_depths { iter as u8 & 1 } else { 0 }; spawn(move || { let mut helper = Helper { negamaxer: Negamaxer::new(table2, eval2, opts2), - command: command2, - waiter, + signal: signal2, stats: stats2, extra_depth, }; @@ -261,7 +290,6 @@ where max_time: Duration::from_secs(5), table, negamaxer, - command, signal, prev_value: 0, opts, @@ -373,22 +401,10 @@ where interval_start = end; } - let iteration_done = Arc::new(AtomicBool::new(false)); - { - let mut command = self.command.lock().unwrap(); - *command = Command::Search(Search { - state: s.clone(), - depth, - timeout: iteration_done.clone(), - }); - self.signal.notify_all(); - } + self.signal.new_search(&s, depth); let value = self.negamaxer.negamax(&mut s_clone, depth + 1, WORST_EVAL, BEST_EVAL); - { - *self.command.lock().unwrap() = Command::Wait; - } - iteration_done.store(true, Ordering::Relaxed); + self.signal.wait(); if value.is_none() { // Timeout. Return the best move from the previous depth. break; From bbe1fe4889648b166f9db2c2e7a719dde772c1d4 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Thu, 5 May 2022 22:10:56 -0700 Subject: [PATCH 077/162] Parallelize aspiration search in LazySmp. Also make aspiration search off by default. --- src/strategies/lazy_smp.rs | 89 +++++++++++++++++++++++--------------- 1 file changed, 55 insertions(+), 34 deletions(-) diff --git a/src/strategies/lazy_smp.rs b/src/strategies/lazy_smp.rs index cc5da79..781f48d 100644 --- a/src/strategies/lazy_smp.rs +++ b/src/strategies/lazy_smp.rs @@ -60,6 +60,8 @@ impl LazySmpOptions { struct Search { state: S, depth: u8, + alpha: Evaluation, + beta: Evaluation, timeout: Arc, } @@ -124,6 +126,8 @@ where fn process(&mut self) { let mut prev_hash: u64 = 0; let mut prev_depth: u8 = 200; + let mut prev_alpha = 0; + let mut prev_beta = 0; loop { let mut search = { let command = self.signal.command.lock().unwrap(); @@ -135,7 +139,10 @@ where Command::Exit => false, Command::Wait => true, Command::Search(ref search) => { - search.state.zobrist_hash() == prev_hash && search.depth == prev_depth + search.state.zobrist_hash() == prev_hash + && search.depth == prev_depth + && prev_alpha == search.alpha + && prev_beta == search.beta } }) .unwrap(); @@ -148,11 +155,13 @@ where }; prev_hash = search.state.zobrist_hash(); prev_depth = search.depth; + prev_alpha = search.alpha; + prev_beta = search.beta; let depth = search.depth + self.extra_depth; self.negamaxer.set_timeout(search.timeout.clone()); - let mut alpha = WORST_EVAL; - let mut beta = BEST_EVAL; + let mut alpha = search.alpha; + let mut beta = search.beta; self.negamaxer.table.check( search.state.zobrist_hash(), depth, @@ -211,10 +220,12 @@ where self.update(Command::Wait); } - fn new_search(&self, state: &S, depth: u8) { + fn new_search(&self, state: &S, depth: u8, alpha: Evaluation, beta: Evaluation) { self.update(Command::Search(Search { state: state.clone(), depth, + alpha, + beta, timeout: Arc::new(AtomicBool::new(false)), })); } @@ -368,43 +379,52 @@ where let mut depth = self.max_depth as u8 % self.opts.step_increment; while depth <= self.max_depth as u8 { - // First, a serial aspiration search to at least establish some bounds. if self.opts.verbose { interval_start = Instant::now(); - println!("LazySmp search depth {} around {}", depth + 1, self.prev_value); + println!("LazySmp search depth {}", depth + 1); } - if self - .negamaxer - .aspiration_search( - &mut s_clone, - depth + 1, - self.prev_value, - self.opts.aspiration_window.unwrap_or(2), - ) - .is_none() - { - // Timeout. - break; - } - if self.opts.verbose { - let mut alpha = WORST_EVAL; - let mut beta = BEST_EVAL; - self.negamaxer.table.check(root_hash, depth + 1, &mut None, &mut alpha, &mut beta); - let end = Instant::now(); - let interval = end - interval_start; - println!( - "LazySmp aspiration search took {}ms; within bounds {}:{}", - interval.as_millis(), - alpha, - beta - ); - interval_start = end; + if let Some(window) = self.opts.aspiration_window { + // First, parallel aspiration search to at least establish some bounds. + let mut alpha = self.prev_value.saturating_sub(window); + if alpha < WORST_EVAL { + alpha = WORST_EVAL; + } + let mut beta = self.prev_value.saturating_add(window); + self.signal.new_search(&s, depth, alpha, beta); + + if self + .negamaxer + .aspiration_search(&mut s_clone, depth + 1, self.prev_value, window) + .is_none() + { + // Timeout. + break; + } + if self.opts.verbose { + alpha = WORST_EVAL; + beta = BEST_EVAL; + self.negamaxer.table.check( + root_hash, + depth + 1, + &mut None, + &mut alpha, + &mut beta, + ); + let end = Instant::now(); + let interval = end - interval_start; + println!( + "LazySmp aspiration search took {}ms; within bounds {}:{}", + interval.as_millis(), + alpha, + beta + ); + interval_start = end; + } } - self.signal.new_search(&s, depth); + self.signal.new_search(&s, depth, WORST_EVAL, BEST_EVAL); let value = self.negamaxer.negamax(&mut s_clone, depth + 1, WORST_EVAL, BEST_EVAL); - self.signal.wait(); if value.is_none() { // Timeout. Return the best move from the previous depth. break; @@ -429,6 +449,7 @@ where self.shared_stats.update(&mut self.negamaxer); self.nodes_explored.push(self.shared_stats.reset_nodes_explored()); } + self.signal.wait(); self.wall_time = start_time.elapsed(); best_move } From d4921b21d682fc56e8b33709b6ce26761bf543af Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Fri, 6 May 2022 22:37:05 -0700 Subject: [PATCH 078/162] New iterative option based on MTD(f) algorithm. --- src/strategies/iterative.rs | 54 +++++++++++++++++++++++++++++++++---- tests/strategies.rs | 8 ++++++ 2 files changed, 57 insertions(+), 5 deletions(-) diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index 8d1edd8..de1dad4 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -116,6 +116,7 @@ pub struct IterativeOptions { pub(super) strategy: Replacement, pub(super) null_window_search: bool, pub(super) aspiration_window: Option, + pub(super) mtdf: bool, pub(super) step_increment: u8, pub(super) max_quiescence_depth: u8, pub(super) min_reorder_moves_depth: u8, @@ -129,6 +130,7 @@ impl IterativeOptions { strategy: Replacement::TwoTier, null_window_search: true, aspiration_window: None, + mtdf: false, step_increment: 1, max_quiescence_depth: 0, min_reorder_moves_depth: u8::MAX, @@ -172,6 +174,15 @@ impl IterativeOptions { self } + /// Whether to search for the correct value in each iteration using only + /// null-window "Tests", with the + /// [MTD(f)](https://en.wikipedia.org/wiki/MTD%28f%29) algorithm. + /// Can be more efficient if the evaluation function is coarse grained. + pub fn with_mtdf(mut self) -> Self { + self.mtdf = true; + self + } + /// Increment the depth by two between iterations. pub fn with_double_step_increment(mut self) -> Self { self.step_increment = 2; @@ -463,6 +474,29 @@ where pub fn principal_variation(&self) -> &[::M] { &self.pv[..] } + + fn mtdf( + &mut self, s: &mut ::S, depth: u8, mut guess: Evaluation, + ) -> Option { + let mut lowerbound = WORST_EVAL; + let mut upperbound = BEST_EVAL; + while lowerbound < upperbound { + let beta = max(lowerbound + 1, guess); + if self.opts.verbose { + println!( + "mtdf depth={} guess={} bounds={}:{}", + depth, beta, lowerbound, upperbound + ); + } + guess = self.negamaxer.negamax(s, depth, beta - 1, beta)?; + if guess < beta { + upperbound = guess; + } else { + lowerbound = guess; + } + } + Some(guess) + } } impl Strategy for IterativeSearch @@ -490,11 +524,21 @@ where let mut depth = self.max_depth as u8 % self.opts.step_increment; while depth <= self.max_depth as u8 { - if let Some(window) = self.opts.aspiration_window { - // Results of the search are stored in the table. - self.negamaxer.aspiration_search(&mut s_clone, depth + 1, self.prev_value, window); - } - if self.negamaxer.negamax(&mut s_clone, depth + 1, WORST_EVAL, BEST_EVAL).is_none() { + let search = if self.opts.mtdf { + self.mtdf(&mut s_clone, depth + 1, self.prev_value) + } else { + if let Some(window) = self.opts.aspiration_window { + // Results of the search are stored in the table. + self.negamaxer.aspiration_search( + &mut s_clone, + depth + 1, + self.prev_value, + window, + ); + } + self.negamaxer.negamax(&mut s_clone, depth + 1, WORST_EVAL, BEST_EVAL) + }; + if search.is_none() { // Timeout. Return the best move from the previous depth. break; } diff --git a/tests/strategies.rs b/tests/strategies.rs index 2810b92..03dbb48 100644 --- a/tests/strategies.rs +++ b/tests/strategies.rs @@ -151,6 +151,7 @@ fn compare_plain_negamax() { IterativeOptions::new() .with_replacement_strategy(Replacement::TwoTier) .with_aspiration_window(5), + IterativeOptions::new().with_replacement_strategy(Replacement::TwoTier).with_mtdf(), ] .drain(..) .enumerate() @@ -226,6 +227,13 @@ fn compare_deep_negamax() { let iterative_value = iterative.root_value(); assert_eq!(value, iterative_value, "search depth={}\n{}", max_depth, b); + let mut mtdf = + IterativeSearch::new(RandomEvaluator::default(), opt.clone().with_mtdf()); + mtdf.set_max_depth(max_depth); + mtdf.choose_move(&b).unwrap(); + let mtdf_value = mtdf.root_value(); + assert_eq!(value, mtdf_value, "search depth={}\n{}", max_depth, b); + let mut parallel = ParallelYbw::new(RandomEvaluator::default(), opt, YbwOptions::default()); parallel.set_max_depth(max_depth); From a265e537da8edcb6db4bc8584b4e47045bc4447e Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Sat, 7 May 2022 22:00:33 -0700 Subject: [PATCH 079/162] Add verbosity to IterativeSearch and clean it up a little. --- src/strategies/iterative.rs | 44 ++++++++++++++++++++++---- src/strategies/lazy_smp.rs | 61 +++++++++++++++++++------------------ src/strategies/table.rs | 17 +++++++++++ 3 files changed, 86 insertions(+), 36 deletions(-) diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index de1dad4..6a849d3 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -521,21 +521,41 @@ where let root_hash = s.zobrist_hash(); let mut s_clone = s.clone(); let mut best_move = None; + let mut interval_start = start_time; let mut depth = self.max_depth as u8 % self.opts.step_increment; while depth <= self.max_depth as u8 { + if self.opts.verbose { + interval_start = Instant::now(); + println!("Iterative search depth {}", depth + 1); + } let search = if self.opts.mtdf { self.mtdf(&mut s_clone, depth + 1, self.prev_value) } else { if let Some(window) = self.opts.aspiration_window { // Results of the search are stored in the table. - self.negamaxer.aspiration_search( - &mut s_clone, - depth + 1, - self.prev_value, - window, - ); + if self + .negamaxer + .aspiration_search(&mut s_clone, depth + 1, self.prev_value, window) + .is_none() + { + // Timeout. + break; + } + } + if self.opts.verbose { + if let Some(entry) = self.negamaxer.table.lookup(root_hash) { + let end = Instant::now(); + let interval = end - interval_start; + println!( + "Iterative aspiration search took {}ms; value {}", + interval.as_millis(), + entry.bounds(), + ); + interval_start = end; + } } + self.negamaxer.negamax(&mut s_clone, depth + 1, WORST_EVAL, BEST_EVAL) }; if search.is_none() { @@ -545,6 +565,15 @@ where let entry = self.negamaxer.table.lookup(root_hash).unwrap(); best_move = entry.best_move; + if self.opts.verbose { + let interval = Instant::now() - interval_start; + println!( + "Iterative full search took {}ms; returned {:?}", + interval.as_millis(), + entry.value + ); + } + self.actual_depth = max(self.actual_depth, depth); self.nodes_explored.push(self.negamaxer.nodes_explored); self.negamaxer.nodes_explored = 0; @@ -553,6 +582,9 @@ where self.negamaxer.table.populate_pv(&mut self.pv, &mut s_clone, depth + 1); } self.wall_time = start_time.elapsed(); + if self.opts.verbose { + println!("{}", self.stats()); + } best_move } } diff --git a/src/strategies/lazy_smp.rs b/src/strategies/lazy_smp.rs index 781f48d..5a26598 100644 --- a/src/strategies/lazy_smp.rs +++ b/src/strategies/lazy_smp.rs @@ -327,6 +327,23 @@ where self.max_depth = 100; } + #[doc(hidden)] + pub fn root_value(&self) -> Evaluation { + unclamp_value(self.prev_value) + } + + /// Return what the engine considered to be the best sequence of moves + /// from both sides. + pub fn principal_variation(&self) -> &[::M] { + &self.pv[..] + } +} + +impl LazySmp +where + ::S: Clone + Zobrist, + ::M: Copy + Eq, +{ /// Return a human-readable summary of the last move generation. pub fn stats(&self) -> String { let total_nodes_explored: u64 = self.nodes_explored.iter().sum(); @@ -340,17 +357,6 @@ where total_nodes_explored, self.actual_depth, mean_branching_factor, effective_branching_factor, self.negamaxer.nodes_explored, throughput as usize) } - - #[doc(hidden)] - pub fn root_value(&self) -> Evaluation { - unclamp_value(self.prev_value) - } - - /// Return what the engine considered to be the best sequence of moves - /// from both sides. - pub fn principal_variation(&self) -> &[::M] { - &self.pv[..] - } } impl Strategy for LazySmp @@ -389,7 +395,7 @@ where if alpha < WORST_EVAL { alpha = WORST_EVAL; } - let mut beta = self.prev_value.saturating_add(window); + let beta = self.prev_value.saturating_add(window); self.signal.new_search(&s, depth, alpha, beta); if self @@ -401,24 +407,16 @@ where break; } if self.opts.verbose { - alpha = WORST_EVAL; - beta = BEST_EVAL; - self.negamaxer.table.check( - root_hash, - depth + 1, - &mut None, - &mut alpha, - &mut beta, - ); - let end = Instant::now(); - let interval = end - interval_start; - println!( - "LazySmp aspiration search took {}ms; within bounds {}:{}", - interval.as_millis(), - alpha, - beta - ); - interval_start = end; + if let Some(entry) = self.table.lookup(root_hash) { + let end = Instant::now(); + let interval = end - interval_start; + println!( + "LazySmp aspiration search took {}ms; value {}", + interval.as_millis(), + entry.bounds(), + ); + interval_start = end; + } } } @@ -451,6 +449,9 @@ where } self.signal.wait(); self.wall_time = start_time.elapsed(); + if self.opts.verbose { + println!("{}", self.stats()); + } best_move } } diff --git a/src/strategies/table.rs b/src/strategies/table.rs index bd9de5f..ace4b3a 100644 --- a/src/strategies/table.rs +++ b/src/strategies/table.rs @@ -1,3 +1,4 @@ +use super::util::unclamp_value; use crate::interface::*; use std::cmp::{max, min}; use std::sync::atomic::{AtomicU32, AtomicU8, Ordering}; @@ -32,6 +33,22 @@ pub(super) fn high_bits(hash: u64) -> u32 { (hash >> 32) as u32 } +impl Entry { + pub(super) fn bounds(&self) -> String { + match self.flag { + EntryFlag::Exact => "=", + EntryFlag::Upperbound => "≤", + EntryFlag::Lowerbound => "≥", + } + .to_string() + + &match unclamp_value(self.value) { + WORST_EVAL => "-∞".to_owned(), + BEST_EVAL => "∞".to_owned(), + value => value.to_string(), + } + } +} + // A trait for a transposition table. The methods are mutual exclusion, but // the idea is that an implementation can wrap a shared concurrent table. pub(super) trait Table { From 0d9045d3251ae4e6ee559e3895f22085bf0a6448 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Thu, 12 May 2022 14:16:38 -0700 Subject: [PATCH 080/162] Various improvements to verbose mode. * Use stderr * Show hash of best move at each depth to see how it changes. * Don't dump hundreds of lines of garbage at the end of the game. --- src/strategies/iterative.rs | 40 +++++++++++++++++++++++---------- src/strategies/lazy_smp.rs | 44 ++++++++++++++++++++++++++----------- 2 files changed, 60 insertions(+), 24 deletions(-) diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index 6a849d3..bf9b741 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -475,6 +475,18 @@ where &self.pv[..] } + // Return a unique id for humans for this move. + fn move_id(&self, s: &mut ::S, m: Option<::M>) -> String { + if let Some(mov) = m { + mov.apply(s); + let id = format!("{:06x}", s.zobrist_hash() & 0xffffff); + mov.undo(s); + id + } else { + "none".to_string() + } + } + fn mtdf( &mut self, s: &mut ::S, depth: u8, mut guess: Evaluation, ) -> Option { @@ -483,7 +495,7 @@ where while lowerbound < upperbound { let beta = max(lowerbound + 1, guess); if self.opts.verbose { - println!( + eprintln!( "mtdf depth={} guess={} bounds={}:{}", depth, beta, lowerbound, upperbound ); @@ -522,12 +534,13 @@ where let mut s_clone = s.clone(); let mut best_move = None; let mut interval_start = start_time; + let mut maxxed = false; let mut depth = self.max_depth as u8 % self.opts.step_increment; while depth <= self.max_depth as u8 { - if self.opts.verbose { + if self.opts.verbose && !maxxed { interval_start = Instant::now(); - println!("Iterative search depth {}", depth + 1); + eprintln!("Iterative search depth {}", depth + 1); } let search = if self.opts.mtdf { self.mtdf(&mut s_clone, depth + 1, self.prev_value) @@ -543,14 +556,15 @@ where break; } } - if self.opts.verbose { + if self.opts.verbose && !maxxed { if let Some(entry) = self.negamaxer.table.lookup(root_hash) { let end = Instant::now(); let interval = end - interval_start; - println!( - "Iterative aspiration search took {}ms; value {}", + eprintln!( + "Iterative aspiration search took {}ms; value{} bestmove={}", interval.as_millis(), entry.bounds(), + self.move_id(&mut s_clone, entry.best_move) ); interval_start = end; } @@ -565,13 +579,17 @@ where let entry = self.negamaxer.table.lookup(root_hash).unwrap(); best_move = entry.best_move; - if self.opts.verbose { + if self.opts.verbose && !maxxed { let interval = Instant::now() - interval_start; - println!( - "Iterative full search took {}ms; returned {:?}", + eprintln!( + "Iterative full search took {}ms; returned {:?} bestmove={}", interval.as_millis(), - entry.value + entry.value, + self.move_id(&mut s_clone, best_move) ); + if unclamp_value(entry.value).abs() == BEST_EVAL { + maxxed = true; + } } self.actual_depth = max(self.actual_depth, depth); @@ -583,7 +601,7 @@ where } self.wall_time = start_time.elapsed(); if self.opts.verbose { - println!("{}", self.stats()); + eprintln!("{}", self.stats()); } best_move } diff --git a/src/strategies/lazy_smp.rs b/src/strategies/lazy_smp.rs index 5a26598..6463024 100644 --- a/src/strategies/lazy_smp.rs +++ b/src/strategies/lazy_smp.rs @@ -357,6 +357,18 @@ where total_nodes_explored, self.actual_depth, mean_branching_factor, effective_branching_factor, self.negamaxer.nodes_explored, throughput as usize) } + + // Return a unique id for humans for this move. + fn move_id(&self, s: &mut ::S, m: Option<::M>) -> String { + if let Some(mov) = m { + mov.apply(s); + let id = format!("{:06x}", s.zobrist_hash() & 0xffffff); + mov.undo(s); + id + } else { + "none".to_string() + } + } } impl Strategy for LazySmp @@ -382,12 +394,13 @@ where let mut s_clone = s.clone(); let mut best_move = None; let mut interval_start = start_time; + let mut maxxed = false; let mut depth = self.max_depth as u8 % self.opts.step_increment; while depth <= self.max_depth as u8 { - if self.opts.verbose { + if self.opts.verbose && !maxxed { interval_start = Instant::now(); - println!("LazySmp search depth {}", depth + 1); + eprintln!("LazySmp search depth {}", depth + 1); } if let Some(window) = self.opts.aspiration_window { // First, parallel aspiration search to at least establish some bounds. @@ -406,14 +419,15 @@ where // Timeout. break; } - if self.opts.verbose { + if self.opts.verbose && !maxxed { if let Some(entry) = self.table.lookup(root_hash) { let end = Instant::now(); let interval = end - interval_start; - println!( - "LazySmp aspiration search took {}ms; value {}", + eprintln!( + "LazySmp aspiration search took {}ms; value {} bestmove={}", interval.as_millis(), entry.bounds(), + self.move_id(&mut s_clone, entry.best_move) ); interval_start = end; } @@ -428,18 +442,22 @@ where break; } - if self.opts.verbose { + let entry = self.table.lookup(root_hash).unwrap(); + best_move = entry.best_move; + + if self.opts.verbose && !maxxed { let interval = Instant::now() - interval_start; - println!( - "LazySmp full search took {}ms; returned {:?}", + eprintln!( + "LazySmp full search took {}ms; returned {:?} bestmove={}", interval.as_millis(), - value.unwrap() + value.unwrap(), + self.move_id(&mut s_clone, entry.best_move) ); + if unclamp_value(value.unwrap()).abs() == BEST_EVAL { + maxxed = true; + } } - let entry = self.table.lookup(root_hash).unwrap(); - best_move = entry.best_move; - self.actual_depth = max(self.actual_depth, depth); self.prev_value = entry.value; depth += self.opts.step_increment; @@ -450,7 +468,7 @@ where self.signal.wait(); self.wall_time = start_time.elapsed(); if self.opts.verbose { - println!("{}", self.stats()); + eprintln!("{}", self.stats()); } best_move } From 2715deeecf8fc7feedf1ee1b1fc5c765fe9729a3 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Fri, 13 May 2022 21:38:13 -0700 Subject: [PATCH 081/162] Add human-readable move stats and pv to verbose mode. --- src/interface.rs | 4 ++++ src/strategies/iterative.rs | 24 +++++++---------------- src/strategies/lazy_smp.rs | 24 +++++++---------------- src/strategies/util.rs | 38 +++++++++++++++++++++++++++++++++++++ 4 files changed, 56 insertions(+), 34 deletions(-) diff --git a/src/interface.rs b/src/interface.rs index d1f502d..b86ee23 100644 --- a/src/interface.rs +++ b/src/interface.rs @@ -57,6 +57,10 @@ pub trait Move { fn apply(&self, state: &mut ::S); /// Revert the state of `S` so that the move is undone. fn undo(&self, state: &mut ::S); + /// Return a human-readable notation for this move in this game state. + fn notation(&self, _state: &::S) -> Option { + None + } } /// The result of playing a game until it finishes. diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index bf9b741..baa77a7 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -451,7 +451,7 @@ where } /// Return a human-readable summary of the last move generation. - pub fn stats(&self) -> String { + pub fn stats(&self, s: &mut ::S) -> String { let total_nodes_explored: u64 = self.nodes_explored.iter().sum(); let mean_branching_factor = self.negamaxer.total_generated_moves as f64 / self.negamaxer.total_generate_move_calls as f64; @@ -459,7 +459,8 @@ where .powf((self.actual_depth as f64 + 1.0).recip()); let throughput = (total_nodes_explored + self.negamaxer.nodes_explored) as f64 / self.wall_time.as_secs_f64(); - format!("Explored {} nodes to depth {}. MBF={:.1} EBF={:.1}\nPartial exploration of next depth hit {} nodes.\n{} nodes/sec", + format!("Principal variation: {}\nExplored {} nodes to depth {}. MBF={:.1} EBF={:.1}\nPartial exploration of next depth hit {} nodes.\n{} nodes/sec", + pv_string::(&self.pv[..], s), total_nodes_explored, self.actual_depth, mean_branching_factor, effective_branching_factor, self.negamaxer.nodes_explored, throughput as usize) } @@ -475,18 +476,6 @@ where &self.pv[..] } - // Return a unique id for humans for this move. - fn move_id(&self, s: &mut ::S, m: Option<::M>) -> String { - if let Some(mov) = m { - mov.apply(s); - let id = format!("{:06x}", s.zobrist_hash() & 0xffffff); - mov.undo(s); - id - } else { - "none".to_string() - } - } - fn mtdf( &mut self, s: &mut ::S, depth: u8, mut guess: Evaluation, ) -> Option { @@ -564,7 +553,7 @@ where "Iterative aspiration search took {}ms; value{} bestmove={}", interval.as_millis(), entry.bounds(), - self.move_id(&mut s_clone, entry.best_move) + move_id::(&mut s_clone, entry.best_move) ); interval_start = end; } @@ -585,7 +574,7 @@ where "Iterative full search took {}ms; returned {:?} bestmove={}", interval.as_millis(), entry.value, - self.move_id(&mut s_clone, best_move) + move_id::(&mut s_clone, best_move) ); if unclamp_value(entry.value).abs() == BEST_EVAL { maxxed = true; @@ -601,7 +590,8 @@ where } self.wall_time = start_time.elapsed(); if self.opts.verbose { - eprintln!("{}", self.stats()); + let mut s_clone = s.clone(); + eprintln!("{}", self.stats(&mut s_clone)); } best_move } diff --git a/src/strategies/lazy_smp.rs b/src/strategies/lazy_smp.rs index 6463024..58ae3ff 100644 --- a/src/strategies/lazy_smp.rs +++ b/src/strategies/lazy_smp.rs @@ -345,7 +345,7 @@ where ::M: Copy + Eq, { /// Return a human-readable summary of the last move generation. - pub fn stats(&self) -> String { + pub fn stats(&self, s: &mut ::S) -> String { let total_nodes_explored: u64 = self.nodes_explored.iter().sum(); let mean_branching_factor = self.shared_stats.generated_moves.load(Ordering::SeqCst) as f64 / self.shared_stats.generate_move_calls.load(Ordering::SeqCst) as f64; @@ -353,22 +353,11 @@ where .powf((self.actual_depth as f64 + 1.0).recip()); let throughput = (total_nodes_explored + self.negamaxer.nodes_explored) as f64 / self.wall_time.as_secs_f64(); - format!("Explored {} nodes to depth {}. MBF={:.1} EBF={:.1}\nPartial exploration of next depth hit {} nodes.\n{} nodes/sec", + format!("Principal variation: {}\nExplored {} nodes to depth {}. MBF={:.1} EBF={:.1}\nPartial exploration of next depth hit {} nodes.\n{} nodes/sec", + pv_string::(&self.pv[..], s), total_nodes_explored, self.actual_depth, mean_branching_factor, effective_branching_factor, self.negamaxer.nodes_explored, throughput as usize) } - - // Return a unique id for humans for this move. - fn move_id(&self, s: &mut ::S, m: Option<::M>) -> String { - if let Some(mov) = m { - mov.apply(s); - let id = format!("{:06x}", s.zobrist_hash() & 0xffffff); - mov.undo(s); - id - } else { - "none".to_string() - } - } } impl Strategy for LazySmp @@ -427,7 +416,7 @@ where "LazySmp aspiration search took {}ms; value {} bestmove={}", interval.as_millis(), entry.bounds(), - self.move_id(&mut s_clone, entry.best_move) + move_id::(&mut s_clone, entry.best_move) ); interval_start = end; } @@ -451,7 +440,7 @@ where "LazySmp full search took {}ms; returned {:?} bestmove={}", interval.as_millis(), value.unwrap(), - self.move_id(&mut s_clone, entry.best_move) + move_id::(&mut s_clone, entry.best_move) ); if unclamp_value(value.unwrap()).abs() == BEST_EVAL { maxxed = true; @@ -468,7 +457,8 @@ where self.signal.wait(); self.wall_time = start_time.elapsed(); if self.opts.verbose { - eprintln!("{}", self.stats()); + let mut s_clone = s.clone(); + eprintln!("{}", self.stats(&mut s_clone)); } best_move } diff --git a/src/strategies/util.rs b/src/strategies/util.rs index 0092dd1..3ec03b2 100644 --- a/src/strategies/util.rs +++ b/src/strategies/util.rs @@ -41,6 +41,44 @@ pub(super) fn timeout_signal(dur: Duration) -> Arc { signal } +// Return a unique id for humans for this move. +pub(super) fn move_id(s: &mut ::S, m: Option<::M>) -> String +where + ::S: Zobrist, +{ + if let Some(mov) = m { + if let Some(notation) = mov.notation(s) { + notation + } else { + mov.apply(s); + let id = format!("{:06x}", s.zobrist_hash() & 0xffffff); + mov.undo(s); + id + } + } else { + "none".to_string() + } +} + +pub(super) fn pv_string(path: &[::M], s: &mut ::S) -> String +where + ::S: Zobrist, + ::M: Copy, +{ + let mut out = String::new(); + for (i, m) in (0..).zip(path.iter()) { + if i > 0 { + out.push_str("; "); + } + out.push_str(move_id::(s, Some(*m)).as_str()); + m.apply(s); + } + for m in path.iter().rev() { + m.undo(s); + } + out +} + // This exists to be wrapped in a mutex, because it didn't work when I tried a tuple. pub(super) struct ValueMove { value: Evaluation, From 4f5a677f327cd7395ff915928099f1a7f6a2ac63 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Wed, 18 May 2022 11:35:24 -0700 Subject: [PATCH 082/162] Use depth the same way everywhere, and also change the depth API by 1. Now depth has the more commonsense meaning of ply. Search at depth 0 is meaningless. --- src/strategies/iterative.rs | 20 ++++++++++++-------- src/strategies/lazy_smp.rs | 19 ++++++++++++------- src/strategies/negamax.rs | 5 ++++- src/strategies/ybw.rs | 11 +++++++---- tests/strategies.rs | 10 +++++----- 5 files changed, 40 insertions(+), 25 deletions(-) diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index baa77a7..a886e09 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -1,6 +1,6 @@ //! An implementation of iterative deepening evaluation. //! -//! Search and evaluate at depth 0, then start over at depth 1, then depth 2, +//! Search and evaluate at depth 1, then start over at depth 2, then depth 3, //! etc. Can keep going until a maximum depth or maximum time or either. Uses //! a transposition table to reuse information from previous iterations. @@ -423,7 +423,7 @@ where let table = TranspositionTable::new(opts.table_byte_size, opts.strategy); let negamaxer = Negamaxer::new(table, eval, opts.clone()); IterativeSearch { - max_depth: 100, + max_depth: 99, max_time: Duration::from_secs(5), prev_value: 0, negamaxer, @@ -447,7 +447,7 @@ where /// iteration. Unlimited max depth. pub fn set_timeout(&mut self, max_time: Duration) { self.max_time = max_time; - self.max_depth = 100; + self.max_depth = 99; } /// Return a human-readable summary of the last move generation. @@ -525,20 +525,24 @@ where let mut interval_start = start_time; let mut maxxed = false; + // Start at 1 or 2 to hit the max depth. let mut depth = self.max_depth as u8 % self.opts.step_increment; + if depth == 0 { + depth = self.opts.step_increment; + } while depth <= self.max_depth as u8 { if self.opts.verbose && !maxxed { interval_start = Instant::now(); - eprintln!("Iterative search depth {}", depth + 1); + eprintln!("Iterative search depth {}", depth); } let search = if self.opts.mtdf { - self.mtdf(&mut s_clone, depth + 1, self.prev_value) + self.mtdf(&mut s_clone, depth, self.prev_value) } else { if let Some(window) = self.opts.aspiration_window { // Results of the search are stored in the table. if self .negamaxer - .aspiration_search(&mut s_clone, depth + 1, self.prev_value, window) + .aspiration_search(&mut s_clone, depth, self.prev_value, window) .is_none() { // Timeout. @@ -559,7 +563,7 @@ where } } - self.negamaxer.negamax(&mut s_clone, depth + 1, WORST_EVAL, BEST_EVAL) + self.negamaxer.negamax(&mut s_clone, depth, WORST_EVAL, BEST_EVAL) }; if search.is_none() { // Timeout. Return the best move from the previous depth. @@ -586,7 +590,7 @@ where self.negamaxer.nodes_explored = 0; self.prev_value = entry.value; depth += self.opts.step_increment; - self.negamaxer.table.populate_pv(&mut self.pv, &mut s_clone, depth + 1); + self.negamaxer.table.populate_pv(&mut self.pv, &mut s_clone, depth); } self.wall_time = start_time.elapsed(); if self.opts.verbose { diff --git a/src/strategies/lazy_smp.rs b/src/strategies/lazy_smp.rs index 58ae3ff..f49e439 100644 --- a/src/strategies/lazy_smp.rs +++ b/src/strategies/lazy_smp.rs @@ -177,7 +177,9 @@ where // Negamax search the rest. for m in moves { m.apply(&mut search.state); - if let Some(value) = self.negamaxer.negamax(&mut search.state, depth, alpha, beta) { + if let Some(value) = + self.negamaxer.negamax(&mut search.state, depth - 1, alpha, beta) + { alpha = max(alpha, -value); } else { break; @@ -297,7 +299,7 @@ where } let negamaxer = Negamaxer::new(table.clone(), eval, opts.clone()); LazySmp { - max_depth: 100, + max_depth: 99, max_time: Duration::from_secs(5), table, negamaxer, @@ -324,7 +326,7 @@ where /// iteration. Unlimited max depth. pub fn set_timeout(&mut self, max_time: Duration) { self.max_time = max_time; - self.max_depth = 100; + self.max_depth = 99; } #[doc(hidden)] @@ -386,10 +388,13 @@ where let mut maxxed = false; let mut depth = self.max_depth as u8 % self.opts.step_increment; + if depth == 0 { + depth = self.opts.step_increment; + } while depth <= self.max_depth as u8 { if self.opts.verbose && !maxxed { interval_start = Instant::now(); - eprintln!("LazySmp search depth {}", depth + 1); + eprintln!("LazySmp search depth {}", depth); } if let Some(window) = self.opts.aspiration_window { // First, parallel aspiration search to at least establish some bounds. @@ -402,7 +407,7 @@ where if self .negamaxer - .aspiration_search(&mut s_clone, depth + 1, self.prev_value, window) + .aspiration_search(&mut s_clone, depth, self.prev_value, window) .is_none() { // Timeout. @@ -425,7 +430,7 @@ where self.signal.new_search(&s, depth, WORST_EVAL, BEST_EVAL); - let value = self.negamaxer.negamax(&mut s_clone, depth + 1, WORST_EVAL, BEST_EVAL); + let value = self.negamaxer.negamax(&mut s_clone, depth, WORST_EVAL, BEST_EVAL); if value.is_none() { // Timeout. Return the best move from the previous depth. break; @@ -450,7 +455,7 @@ where self.actual_depth = max(self.actual_depth, depth); self.prev_value = entry.value; depth += self.opts.step_increment; - self.table.populate_pv(&mut self.pv, &mut s_clone, depth + 1); + self.table.populate_pv(&mut self.pv, &mut s_clone, depth); self.shared_stats.update(&mut self.negamaxer); self.nodes_explored.push(self.shared_stats.reset_nodes_explored()); } diff --git a/src/strategies/negamax.rs b/src/strategies/negamax.rs index 670e758..7ebe964 100644 --- a/src/strategies/negamax.rs +++ b/src/strategies/negamax.rs @@ -69,6 +69,9 @@ where ::M: Copy, { fn choose_move(&mut self, s: &::S) -> Option<::M> { + if self.max_depth == 0 { + return None; + } let mut best = WORST_EVAL; let mut moves = self.move_pool.alloc(); E::G::generate_moves(s, &mut moves); @@ -81,7 +84,7 @@ where for &m in moves.iter() { // determine value for this move m.apply(&mut s_clone); - let value = -self.negamax(&mut s_clone, self.max_depth, WORST_EVAL, -best); + let value = -self.negamax(&mut s_clone, self.max_depth - 1, WORST_EVAL, -best); m.undo(&mut s_clone); // Strictly better than any move found so far. if value > best { diff --git a/src/strategies/ybw.rs b/src/strategies/ybw.rs index f7b35c9..c43ea69 100644 --- a/src/strategies/ybw.rs +++ b/src/strategies/ybw.rs @@ -77,7 +77,7 @@ impl ParallelYbw { pub fn new(eval: E, opts: IterativeOptions, ybw_opts: YbwOptions) -> ParallelYbw { let table = LockfreeTable::new(opts.table_byte_size); ParallelYbw { - max_depth: 100, + max_depth: 99, max_time: Duration::from_secs(5), timeout: Arc::new(AtomicBool::new(false)), table, @@ -109,7 +109,7 @@ impl ParallelYbw { /// iteration. Unlimited max depth. pub fn set_timeout(&mut self, max_time: Duration) { self.max_time = max_time; - self.max_depth = 100; + self.max_depth = 99; } #[doc(hidden)] @@ -330,8 +330,11 @@ where let mut best_move = None; let mut depth = self.max_depth as u8 % self.opts.step_increment; + if depth == 0 { + depth = self.opts.step_increment; + } while depth <= self.max_depth as u8 { - if self.negamax(&mut s_clone, depth + 1, WORST_EVAL, BEST_EVAL).is_none() { + if self.negamax(&mut s_clone, depth, WORST_EVAL, BEST_EVAL).is_none() { // Timeout. Return the best move from the previous depth. break; } @@ -343,7 +346,7 @@ where self.prev_value = entry.value; self.next_depth_nodes = 0; depth += self.opts.step_increment; - self.table.populate_pv(&mut self.pv, &mut s_clone, depth + 1); + self.table.populate_pv(&mut self.pv, &mut s_clone, depth); } self.wall_time = start_time.elapsed(); best_move diff --git a/tests/strategies.rs b/tests/strategies.rs index 03dbb48..dff97b4 100644 --- a/tests/strategies.rs +++ b/tests/strategies.rs @@ -65,7 +65,7 @@ where let mut s_clone = s.clone(); for &m in moves.iter() { m.apply(&mut s_clone); - let value = -self.negamax(&mut s_clone, self.depth); + let value = -self.negamax(&mut s_clone, self.depth - 1); m.undo(&mut s_clone); if value == best_value { self.best_moves.push(m); @@ -121,7 +121,7 @@ fn generate_random_state(depth: usize) -> connect4::Board { #[test] fn compare_plain_negamax() { for _ in 0..100 { - for max_depth in 0..5 { + for max_depth in 1..6 { let b = generate_random_state(10); let mut plain_negamax = PlainNegamax::new(RandomEvaluator::default(), max_depth); @@ -213,8 +213,8 @@ fn compare_plain_negamax() { #[test] fn compare_deep_negamax() { let opt = IterativeOptions::new().with_table_byte_size(64000); - for _ in 0..10 { - for max_depth in 0..9 { + for iter in 0..10 { + for max_depth in 1..10 { let b = generate_random_state(10); let mut negamax = Negamax::new(RandomEvaluator, max_depth); @@ -246,7 +246,7 @@ fn compare_deep_negamax() { lazysmp.set_max_depth(max_depth); lazysmp.choose_move(&b).unwrap(); let lazysmp_value = lazysmp.root_value(); - assert_eq!(value, lazysmp_value, "search depth={}\n{}", max_depth, b); + assert_eq!(value, lazysmp_value, "search depth={} iter={}\n{}", max_depth, iter, b); } } } From 0b8d915bc6e9184ee98a7fb26782ade265807c88 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Fri, 20 May 2022 23:43:39 -0700 Subject: [PATCH 083/162] Add null move pruning option. Yet another impure optimization that deviates from the pure negamax result. I'm not convinced that it totally makes sense, but it's optional anyway. --- src/interface.rs | 8 ++++++ src/strategies/iterative.rs | 51 ++++++++++++++++++++++++++++--------- 2 files changed, 47 insertions(+), 12 deletions(-) diff --git a/src/interface.rs b/src/interface.rs index b86ee23..8a1d22b 100644 --- a/src/interface.rs +++ b/src/interface.rs @@ -130,6 +130,14 @@ pub trait Game: Sized { /// `Some(Draw)` if the state is terminal without a winner, and `None` if /// the state is non-terminal. fn get_winner(state: &Self::S) -> Option; + + /// Optional method to return a move that does not change the board state. + /// This does not need to be a legal move from this position, but it is + /// used in some strategies to reject a position early if even passing gives + /// a good position for the opponent. + fn null_move(_state: &Self::S) -> Option { + None + } } /// Defines a method of choosing a move for the current player. diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index a886e09..68ad550 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -115,6 +115,7 @@ pub struct IterativeOptions { pub(super) table_byte_size: usize, pub(super) strategy: Replacement, pub(super) null_window_search: bool, + pub(super) null_move_depth: Option, pub(super) aspiration_window: Option, pub(super) mtdf: bool, pub(super) step_increment: u8, @@ -129,6 +130,7 @@ impl IterativeOptions { table_byte_size: 1_000_000, strategy: Replacement::TwoTier, null_window_search: true, + null_move_depth: None, aspiration_window: None, mtdf: false, step_increment: 1, @@ -167,6 +169,14 @@ impl IterativeOptions { self } + /// Whether to attempt to cut off early by seeing if each node is amazing + /// even after passing the turn to the opponent. Null move search explores + /// the tree at a depth reduced by this amount. + pub fn with_null_move_depth(mut self, depth_reduction: u8) -> Self { + self.null_move_depth = Some(depth_reduction); + self + } + /// Whether to search first in a narrow window around the previous root /// value on each iteration. pub fn with_aspiration_window(mut self, window: Evaluation) -> Self { @@ -321,6 +331,23 @@ where return Some(value); } + if let (Some(depth_reduction), Some(null_move)) = + (self.opts.null_move_depth, E::G::null_move(s)) + { + if depth >= depth_reduction { + // If we just pass and let the opponent play this position (at reduced depth), + null_move.apply(s); + let value = -self.negamax(s, depth - depth_reduction, -beta, -beta + 1)?; + null_move.undo(s); + // is the result still so good that we shouldn't bother with a full search? + if value >= beta { + // This value was at a fake depth, so don't assume too + // much about the lowerbound. + return Some(beta); + } + } + } + let mut moves = self.move_pool.alloc(); E::G::generate_moves(s, &mut moves); self.total_generate_move_calls += 1; @@ -548,18 +575,18 @@ where // Timeout. break; } - } - if self.opts.verbose && !maxxed { - if let Some(entry) = self.negamaxer.table.lookup(root_hash) { - let end = Instant::now(); - let interval = end - interval_start; - eprintln!( - "Iterative aspiration search took {}ms; value{} bestmove={}", - interval.as_millis(), - entry.bounds(), - move_id::(&mut s_clone, entry.best_move) - ); - interval_start = end; + if self.opts.verbose && !maxxed { + if let Some(entry) = self.negamaxer.table.lookup(root_hash) { + let end = Instant::now(); + let interval = end - interval_start; + eprintln!( + "Iterative aspiration search took {}ms; value{} bestmove={}", + interval.as_millis(), + entry.bounds(), + move_id::(&mut s_clone, entry.best_move) + ); + interval_start = end; + } } } From 0f1504d94310315ed7a78ce607dee1bc502ea2a0 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Sun, 22 May 2022 11:01:34 -0700 Subject: [PATCH 084/162] Reorder top-level moves on every iteration. May be slightly more efficient when finding a better move, but most usefully adds initial randomization. --- src/strategies/iterative.rs | 27 ++++++++++++++++++++++++++- src/strategies/lazy_smp.rs | 8 +++++++- src/strategies/util.rs | 4 ++-- 3 files changed, 35 insertions(+), 4 deletions(-) diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index 68ad550..9f46bb7 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -9,6 +9,7 @@ use super::super::util::*; use super::table::*; use super::util::*; +use rand::prelude::SliceRandom; use std::cmp::max; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; @@ -422,6 +423,24 @@ where self.negamax(s, depth, alpha, beta)?; Some(()) } + + pub(super) fn search_and_reorder( + &mut self, s: &mut ::S, moves: &mut [ValueMove<::M>], depth: u8, + ) -> Option { + let mut alpha = WORST_EVAL; + let beta = BEST_EVAL; + for value_move in moves.iter_mut() { + value_move.m.apply(s); + let value = -self.negamax(s, depth - 1, -beta, -alpha)?; + value_move.m.undo(s); + + alpha = max(alpha, value); + value_move.value = value; + } + moves.sort_by_key(|vm| -vm.value); + self.table.update(s.zobrist_hash(), alpha, beta, depth, moves[0].value, moves[0].m); + Some(moves[0].value) + } } pub struct IterativeSearch { @@ -551,6 +570,12 @@ where let mut best_move = None; let mut interval_start = start_time; let mut maxxed = false; + // Store the moves so they can be reordered every iteration. + let mut moves = Vec::new(); + E::G::generate_moves(&s_clone, &mut moves); + // Start in a random order. + moves.shuffle(&mut rand::thread_rng()); + let mut moves = moves.into_iter().map(|m| ValueMove::new(0, m)).collect::>(); // Start at 1 or 2 to hit the max depth. let mut depth = self.max_depth as u8 % self.opts.step_increment; @@ -590,7 +615,7 @@ where } } - self.negamaxer.negamax(&mut s_clone, depth, WORST_EVAL, BEST_EVAL) + self.negamaxer.search_and_reorder(&mut s_clone, &mut moves[..], depth) }; if search.is_none() { // Timeout. Return the best move from the previous depth. diff --git a/src/strategies/lazy_smp.rs b/src/strategies/lazy_smp.rs index f49e439..a5ccbaf 100644 --- a/src/strategies/lazy_smp.rs +++ b/src/strategies/lazy_smp.rs @@ -386,6 +386,12 @@ where let mut best_move = None; let mut interval_start = start_time; let mut maxxed = false; + // Store the moves so they can be reordered every iteration. + let mut moves = Vec::new(); + E::G::generate_moves(&s_clone, &mut moves); + // Start in a random order. + moves.shuffle(&mut rand::thread_rng()); + let mut moves = moves.into_iter().map(|m| ValueMove::new(0, m)).collect::>(); let mut depth = self.max_depth as u8 % self.opts.step_increment; if depth == 0 { @@ -430,7 +436,7 @@ where self.signal.new_search(&s, depth, WORST_EVAL, BEST_EVAL); - let value = self.negamaxer.negamax(&mut s_clone, depth, WORST_EVAL, BEST_EVAL); + let value = self.negamaxer.search_and_reorder(&mut s_clone, &mut moves, depth); if value.is_none() { // Timeout. Return the best move from the previous depth. break; diff --git a/src/strategies/util.rs b/src/strategies/util.rs index 3ec03b2..8d9e822 100644 --- a/src/strategies/util.rs +++ b/src/strategies/util.rs @@ -81,8 +81,8 @@ where // This exists to be wrapped in a mutex, because it didn't work when I tried a tuple. pub(super) struct ValueMove { - value: Evaluation, - m: M, + pub(super) value: Evaluation, + pub(super) m: M, } impl ValueMove { From 069604fda28638cb0cca07fc3ad2a7ece81ab3f2 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Tue, 24 May 2022 10:22:48 -0700 Subject: [PATCH 085/162] Add verbosity to ParallelYbw. --- src/strategies/ybw.rs | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/src/strategies/ybw.rs b/src/strategies/ybw.rs index c43ea69..927c441 100644 --- a/src/strategies/ybw.rs +++ b/src/strategies/ybw.rs @@ -328,12 +328,18 @@ where let root_hash = s.zobrist_hash(); let mut s_clone = s.clone(); let mut best_move = None; + let mut interval_start = start_time; + let mut maxxed = false; let mut depth = self.max_depth as u8 % self.opts.step_increment; if depth == 0 { depth = self.opts.step_increment; } while depth <= self.max_depth as u8 { + if self.opts.verbose && !maxxed { + interval_start = Instant::now(); + eprintln!("Ybw search depth {}", depth); + } if self.negamax(&mut s_clone, depth, WORST_EVAL, BEST_EVAL).is_none() { // Timeout. Return the best move from the previous depth. break; @@ -341,6 +347,19 @@ where let entry = self.table.lookup(root_hash).unwrap(); best_move = entry.best_move; + if self.opts.verbose && !maxxed { + let interval = Instant::now() - interval_start; + eprintln!( + "Ybw search took {}ms; returned {:?} bestmove={}", + interval.as_millis(), + entry.value, + move_id::(&mut s_clone, best_move) + ); + if unclamp_value(entry.value).abs() == BEST_EVAL { + maxxed = true; + } + } + self.actual_depth = max(self.actual_depth, depth); self.nodes_explored.push(self.next_depth_nodes); self.prev_value = entry.value; @@ -349,6 +368,10 @@ where self.table.populate_pv(&mut self.pv, &mut s_clone, depth); } self.wall_time = start_time.elapsed(); + if self.opts.verbose { + let mut s_clone = s.clone(); + eprintln!("Principal variation: {}", pv_string::(&self.pv[..], &mut s_clone)); + } best_move } } From c76789282130cd532b781095e82fc580a06bee4d Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Tue, 24 May 2022 17:16:18 -0700 Subject: [PATCH 086/162] Add num_threads option to ybw. --- src/strategies/ybw.rs | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/src/strategies/ybw.rs b/src/strategies/ybw.rs index 927c441..7b0304e 100644 --- a/src/strategies/ybw.rs +++ b/src/strategies/ybw.rs @@ -21,12 +21,13 @@ use std::time::{Duration, Instant}; /// Options to use for the parallel search engine. #[derive(Clone, Copy)] pub struct YbwOptions { + num_threads: Option, serial_cutoff_depth: u8, } impl YbwOptions { pub fn new() -> Self { - YbwOptions { serial_cutoff_depth: 1 } + YbwOptions { num_threads: None, serial_cutoff_depth: 1 } } } @@ -37,6 +38,12 @@ impl Default for YbwOptions { } impl YbwOptions { + /// Set the total number of threads to use. Otherwise defaults to num_cpus. + pub fn with_num_threads(mut self, num_threads: usize) -> Self { + self.num_threads = Some(num_threads); + self + } + /// At what depth should we stop trying to parallelize and just run serially. pub fn with_serial_cutoff_depth(mut self, depth: u8) -> Self { self.serial_cutoff_depth = depth; @@ -53,6 +60,8 @@ pub struct ParallelYbw { prev_value: Evaluation, eval: E, + thread_pool: rayon::ThreadPool, + opts: IterativeOptions, ybw_opts: YbwOptions, @@ -76,6 +85,8 @@ pub struct ParallelYbw { impl ParallelYbw { pub fn new(eval: E, opts: IterativeOptions, ybw_opts: YbwOptions) -> ParallelYbw { let table = LockfreeTable::new(opts.table_byte_size); + let num_threads = ybw_opts.num_threads.unwrap_or_else(num_cpus::get); + let pool_builder = rayon::ThreadPoolBuilder::new().num_threads(num_threads); ParallelYbw { max_depth: 99, max_time: Duration::from_secs(5), @@ -83,6 +94,7 @@ impl ParallelYbw { table, //move_pool: MovePool::<_>::default(), prev_value: 0, + thread_pool: pool_builder.build().unwrap(), opts, ybw_opts, eval, @@ -340,7 +352,11 @@ where interval_start = Instant::now(); eprintln!("Ybw search depth {}", depth); } - if self.negamax(&mut s_clone, depth, WORST_EVAL, BEST_EVAL).is_none() { + if self + .thread_pool + .install(|| self.negamax(&mut s_clone, depth, WORST_EVAL, BEST_EVAL)) + .is_none() + { // Timeout. Return the best move from the previous depth. break; } From 724a9ff58571b060553c9f73a57e537a15ee3b6f Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Tue, 24 May 2022 21:19:25 -0700 Subject: [PATCH 087/162] Add background pondering to ybw. Since it already has a threadpool, we can just throw an infinite ponder onto it. I had to refactor iterative_search into its own method in its own object mostly just to have its own cancel signal. We don't wait for the background tasks to finish before starting the next search, as it probably doesn't speed up the cancellation at all, just starts filling idle threads with useful work faster. --- src/strategies/table.rs | 14 +- src/strategies/ybw.rs | 301 +++++++++++++++++++++++----------------- tests/strategies.rs | 4 +- 3 files changed, 184 insertions(+), 135 deletions(-) diff --git a/src/strategies/table.rs b/src/strategies/table.rs index ace4b3a..836eae3 100644 --- a/src/strategies/table.rs +++ b/src/strategies/table.rs @@ -34,6 +34,14 @@ pub(super) fn high_bits(hash: u64) -> u32 { } impl Entry { + pub(super) fn value_string(&self) -> String { + match unclamp_value(self.value) { + WORST_EVAL => "-∞".to_owned(), + BEST_EVAL => "∞".to_owned(), + value => value.to_string(), + } + } + pub(super) fn bounds(&self) -> String { match self.flag { EntryFlag::Exact => "=", @@ -41,11 +49,7 @@ impl Entry { EntryFlag::Lowerbound => "≥", } .to_string() - + &match unclamp_value(self.value) { - WORST_EVAL => "-∞".to_owned(), - BEST_EVAL => "∞".to_owned(), - value => value.to_string(), - } + + &self.value_string() } } diff --git a/src/strategies/ybw.rs b/src/strategies/ybw.rs index 7b0304e..e73963c 100644 --- a/src/strategies/ybw.rs +++ b/src/strategies/ybw.rs @@ -23,11 +23,12 @@ use std::time::{Duration, Instant}; pub struct YbwOptions { num_threads: Option, serial_cutoff_depth: u8, + background_pondering: bool, } impl YbwOptions { pub fn new() -> Self { - YbwOptions { num_threads: None, serial_cutoff_depth: 1 } + YbwOptions { num_threads: None, serial_cutoff_depth: 1, background_pondering: false } } } @@ -49,90 +50,34 @@ impl YbwOptions { self.serial_cutoff_depth = depth; self } + + /// Continuing processing during opponent's move. + pub fn with_background_pondering(mut self) -> Self { + self.background_pondering = true; + self + } } -pub struct ParallelYbw { - max_depth: usize, - max_time: Duration, - timeout: Arc, - table: LockfreeTable<<::G as Game>::M>, - //move_pool: MovePool<::M>, - prev_value: Evaluation, +struct ParallelNegamaxer { + table: Arc::M>>, eval: E, - - thread_pool: rayon::ThreadPool, - opts: IterativeOptions, ybw_opts: YbwOptions, - - // Runtime stats for the last move generated. - - // Maximum depth used to produce the move. - actual_depth: u8, - // Nodes explored at each depth. - nodes_explored: Vec, - // Nodes explored past this depth, and thus only useful for filling TT for - // next choose_move. - next_depth_nodes: u64, - // For computing the average branching factor. - total_generate_move_calls: u64, - total_generated_moves: u64, - table_hits: usize, - pv: Vec<::M>, - wall_time: Duration, + timeout: Arc, + // TODO: stats } -impl ParallelYbw { - pub fn new(eval: E, opts: IterativeOptions, ybw_opts: YbwOptions) -> ParallelYbw { - let table = LockfreeTable::new(opts.table_byte_size); - let num_threads = ybw_opts.num_threads.unwrap_or_else(num_cpus::get); - let pool_builder = rayon::ThreadPoolBuilder::new().num_threads(num_threads); - ParallelYbw { - max_depth: 99, - max_time: Duration::from_secs(5), - timeout: Arc::new(AtomicBool::new(false)), - table, - //move_pool: MovePool::<_>::default(), - prev_value: 0, - thread_pool: pool_builder.build().unwrap(), - opts, - ybw_opts, - eval, - actual_depth: 0, - nodes_explored: Vec::new(), - next_depth_nodes: 0, - total_generate_move_calls: 0, - total_generated_moves: 0, - table_hits: 0, - pv: Vec::new(), - wall_time: Duration::default(), - } - } - - /// Set the maximum depth to search. Disables the timeout. - /// This can be changed between moves while reusing the transposition table. - pub fn set_max_depth(&mut self, depth: usize) { - self.max_depth = depth; - self.max_time = Duration::new(0, 0); - } - - /// Set the maximum time to compute the best move. When the timeout is - /// hit, it returns the best move found of the previous full - /// iteration. Unlimited max depth. - pub fn set_timeout(&mut self, max_time: Duration) { - self.max_time = max_time; - self.max_depth = 99; - } - - #[doc(hidden)] - pub fn root_value(&self) -> Evaluation { - unclamp_value(self.prev_value) - } - - /// Return what the engine considered to be the best sequence of moves - /// from both sides. - pub fn principal_variation(&self) -> &[::M] { - &self.pv[..] +impl ParallelNegamaxer +where + ::S: Clone + Zobrist + Send + Sync, + ::M: Copy + Eq + Send + Sync, + E: Clone + Sync + Send + 'static, +{ + fn new( + opts: IterativeOptions, ybw_opts: YbwOptions, eval: E, + table: Arc::M>>, timeout: Arc, + ) -> Self { + Self { table, eval, opts, ybw_opts, timeout } } // Negamax only among noisy moves. @@ -312,82 +257,182 @@ impl ParallelYbw { //self.move_pool.free(moves); Some(clamp_value(best)) } -} -impl Strategy for ParallelYbw -where - ::S: Clone + Zobrist + Send + Sync, - ::M: Copy + Eq + Send + Sync, - E: Sync, -{ - fn choose_move(&mut self, s: &::S) -> Option<::M> { - self.table.advance_generation(); - // Reset stats. - self.nodes_explored.clear(); - self.next_depth_nodes = 0; - self.total_generate_move_calls = 0; - self.total_generated_moves = 0; - self.actual_depth = 0; - self.table_hits = 0; - let start_time = Instant::now(); - // Start timer if configured. - self.timeout = if self.max_time == Duration::new(0, 0) { - Arc::new(AtomicBool::new(false)) - } else { - timeout_signal(self.max_time) - }; - - let root_hash = s.zobrist_hash(); - let mut s_clone = s.clone(); + fn iterative_search( + &self, mut state: ::S, max_depth: u8, background: bool, + ) -> Option<(::M, Evaluation)> { + self.table.concurrent_advance_generation(); + let root_hash = state.zobrist_hash(); let mut best_move = None; - let mut interval_start = start_time; + let mut best_value = 0; + let mut interval_start = Instant::now(); let mut maxxed = false; + let mut pv = String::new(); - let mut depth = self.max_depth as u8 % self.opts.step_increment; + let mut depth = max_depth % self.opts.step_increment; if depth == 0 { depth = self.opts.step_increment; } - while depth <= self.max_depth as u8 { - if self.opts.verbose && !maxxed { + while depth <= max_depth as u8 { + if self.opts.verbose && !background && !maxxed { interval_start = Instant::now(); - eprintln!("Ybw search depth {}", depth); + eprint!("Ybw search depth{:>2}", depth); } - if self - .thread_pool - .install(|| self.negamax(&mut s_clone, depth, WORST_EVAL, BEST_EVAL)) - .is_none() - { + if self.negamax(&mut state, depth, WORST_EVAL, BEST_EVAL).is_none() { // Timeout. Return the best move from the previous depth. + if self.opts.verbose && !background && !maxxed { + eprintln!(" timed out"); + } break; } let entry = self.table.lookup(root_hash).unwrap(); best_move = entry.best_move; + best_value = entry.value; - if self.opts.verbose && !maxxed { + if self.opts.verbose && !background && !maxxed { let interval = Instant::now() - interval_start; eprintln!( - "Ybw search took {}ms; returned {:?} bestmove={}", + " took{:>5}ms; returned{:>5}; bestmove {}", interval.as_millis(), - entry.value, - move_id::(&mut s_clone, best_move) + entry.value_string(), + move_id::(&mut state, best_move) ); if unclamp_value(entry.value).abs() == BEST_EVAL { maxxed = true; } } - self.actual_depth = max(self.actual_depth, depth); - self.nodes_explored.push(self.next_depth_nodes); - self.prev_value = entry.value; - self.next_depth_nodes = 0; depth += self.opts.step_increment; - self.table.populate_pv(&mut self.pv, &mut s_clone, depth); + let mut pv_moves = Vec::new(); + self.table.populate_pv(&mut pv_moves, &mut state, depth); + pv = pv_string::(&pv_moves[..], &mut state); } - self.wall_time = start_time.elapsed(); - if self.opts.verbose { - let mut s_clone = s.clone(); - eprintln!("Principal variation: {}", pv_string::(&self.pv[..], &mut s_clone)); + if self.opts.verbose && !background { + eprintln!("Principal variation: {}", pv); } - best_move + best_move.map(|m| (m, best_value)) + } +} + +pub struct ParallelYbw { + max_depth: u8, + max_time: Duration, + + background_cancel: Arc, + table: Arc::M>>, + //move_pool: MovePool<::M>, + prev_value: Evaluation, + eval: E, + + thread_pool: rayon::ThreadPool, + + opts: IterativeOptions, + ybw_opts: YbwOptions, +} + +impl ParallelYbw { + pub fn new(eval: E, opts: IterativeOptions, ybw_opts: YbwOptions) -> ParallelYbw { + let table = Arc::new(LockfreeTable::new(opts.table_byte_size)); + let num_threads = ybw_opts.num_threads.unwrap_or_else(num_cpus::get); + let pool_builder = rayon::ThreadPoolBuilder::new().num_threads(num_threads); + ParallelYbw { + max_depth: 99, + max_time: Duration::from_secs(5), + background_cancel: Arc::new(AtomicBool::new(false)), + table, + //move_pool: MovePool::<_>::default(), + prev_value: 0, + thread_pool: pool_builder.build().unwrap(), + opts, + ybw_opts, + eval, + } + } + + /// Set the maximum depth to search. Disables the timeout. + /// This can be changed between moves while reusing the transposition table. + pub fn set_max_depth(&mut self, depth: u8) { + self.max_depth = depth; + self.max_time = Duration::new(0, 0); + } + + /// Set the maximum time to compute the best move. When the timeout is + /// hit, it returns the best move found of the previous full + /// iteration. Unlimited max depth. + pub fn set_timeout(&mut self, max_time: Duration) { + self.max_time = max_time; + self.max_depth = 99; + } + + #[doc(hidden)] + pub fn root_value(&self) -> Evaluation { + unclamp_value(self.prev_value) + } + + // Unimplemented, just use verbose mode. + pub fn principal_variation(&self) -> &[::M] { + &[] + } +} + +impl Strategy for ParallelYbw +where + ::S: Clone + Zobrist + Send + Sync, + ::M: Copy + Eq + Send + Sync, + E: Clone + Sync + Send + 'static, +{ + fn choose_move(&mut self, s: &::S) -> Option<::M> { + // Cancel any ongoing background processing. + self.background_cancel.store(true, Ordering::SeqCst); + // Start timer if configured. + let timeout = if self.max_time == Duration::new(0, 0) { + Arc::new(AtomicBool::new(false)) + } else { + timeout_signal(self.max_time) + }; + + let best_value_move = { + let negamaxer = ParallelNegamaxer::new( + self.opts.clone(), + self.ybw_opts.clone(), + self.eval.clone(), + self.table.clone(), + timeout.clone(), + ); + // Launch in threadpool and wait for result. + self.thread_pool + .install(|| negamaxer.iterative_search(s.clone(), self.max_depth, false)) + }; + if let Some((best_move, value)) = best_value_move { + self.prev_value = value; + + if self.ybw_opts.background_pondering { + self.background_cancel = Arc::new(AtomicBool::new(false)); + // Create a separate negamaxer to have a dedicated cancel + // signal, and to allow the negamaxer to outlive this scope. + let negamaxer = ParallelNegamaxer::new( + self.opts.clone(), + self.ybw_opts.clone(), + self.eval.clone(), + self.table.clone(), + self.background_cancel.clone(), + ); + let mut state = s.clone(); + best_move.apply(&mut state); + // Launch in threadpool asynchronously. + self.thread_pool.spawn(move || { + negamaxer.iterative_search(state, 99, true); + }); + } + Some(best_move) + } else { + None + } + } +} + +impl Drop for ParallelYbw { + fn drop(&mut self) { + self.background_cancel.store(true, Ordering::SeqCst); } } diff --git a/tests/strategies.rs b/tests/strategies.rs index dff97b4..78ae812 100644 --- a/tests/strategies.rs +++ b/tests/strategies.rs @@ -179,7 +179,7 @@ fn compare_plain_negamax() { let opt = IterativeOptions::new().with_table_byte_size(64000); let mut ybw = ParallelYbw::new(RandomEvaluator::default(), opt, YbwOptions::default()); - ybw.set_max_depth(max_depth); + ybw.set_max_depth(max_depth as u8); let ybw_move = ybw.choose_move(&b).unwrap(); let ybw_value = ybw.root_value(); assert_eq!(value, ybw_value, "search depth={}\n{}", max_depth, b); @@ -236,7 +236,7 @@ fn compare_deep_negamax() { let mut parallel = ParallelYbw::new(RandomEvaluator::default(), opt, YbwOptions::default()); - parallel.set_max_depth(max_depth); + parallel.set_max_depth(max_depth as u8); parallel.choose_move(&b).unwrap(); let parallel_value = parallel.root_value(); assert_eq!(value, parallel_value, "search depth={}\n{}", max_depth, b); From c432090b2230e1b9f1e0e0efd560150659020a49 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Tue, 24 May 2022 22:10:02 -0700 Subject: [PATCH 088/162] Fix crash in background threads. --- src/strategies/ybw.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/strategies/ybw.rs b/src/strategies/ybw.rs index e73963c..995831b 100644 --- a/src/strategies/ybw.rs +++ b/src/strategies/ybw.rs @@ -285,7 +285,18 @@ where } break; } - let entry = self.table.lookup(root_hash).unwrap(); + let entry = match self.table.lookup(root_hash) { + Some(entry) => entry, + None => { + if background { + // Main tasks overwrote our result, just bail early. + return None; + } else { + panic!("Probably some race condition ate the best entry."); + } + } + }; + best_move = entry.best_move; best_value = entry.value; From 07c847eb13e9717b408aa8bf3cc1631fb5a4aef1 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Tue, 24 May 2022 22:16:11 -0700 Subject: [PATCH 089/162] Copy null-move-depth into ybw. --- src/strategies/ybw.rs | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/strategies/ybw.rs b/src/strategies/ybw.rs index 995831b..a627ee5 100644 --- a/src/strategies/ybw.rs +++ b/src/strategies/ybw.rs @@ -152,6 +152,23 @@ where return Some(value); } + if let (Some(depth_reduction), Some(null_move)) = + (self.opts.null_move_depth, E::G::null_move(s)) + { + if depth >= depth_reduction { + // If we just pass and let the opponent play this position (at reduced depth), + null_move.apply(s); + let value = -self.negamax(s, depth - depth_reduction, -beta, -beta + 1)?; + null_move.undo(s); + // is the result still so good that we shouldn't bother with a full search? + if value >= beta { + // This value was at a fake depth, so don't assume too + // much about the lowerbound. + return Some(beta); + } + } + } + //let mut moves = self.move_pool.alloc(); let mut moves = Vec::new(); E::G::generate_moves(s, &mut moves); From 9d712e5c16ea1ecdd633fa48c3960a7d5eecc7dd Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Wed, 25 May 2022 09:54:00 -0700 Subject: [PATCH 090/162] Standardize all APIs to use u8 for depth. If someone did pass a value > u8::MAX, it would get truncated internally in a few places, so just don't allow that in the start of the API. --- src/strategies/iterative.rs | 4 ++-- src/strategies/lazy_smp.rs | 4 ++-- src/strategies/negamax.rs | 6 +++--- src/util.rs | 6 ++---- tests/strategies.rs | 12 ++++++------ 5 files changed, 15 insertions(+), 17 deletions(-) diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index 9f46bb7..b9a07ed 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -444,7 +444,7 @@ where } pub struct IterativeSearch { - max_depth: usize, + max_depth: u8, max_time: Duration, negamaxer: Negamaxer::M>>, prev_value: Evaluation, @@ -483,7 +483,7 @@ where /// Set the maximum depth to search. Disables the timeout. /// This can be changed between moves while reusing the transposition table. - pub fn set_max_depth(&mut self, depth: usize) { + pub fn set_max_depth(&mut self, depth: u8) { self.max_depth = depth; self.max_time = Duration::new(0, 0); } diff --git a/src/strategies/lazy_smp.rs b/src/strategies/lazy_smp.rs index a5ccbaf..48f5994 100644 --- a/src/strategies/lazy_smp.rs +++ b/src/strategies/lazy_smp.rs @@ -247,7 +247,7 @@ where ::S: Clone + Zobrist, ::M: Copy + Eq, { - max_depth: usize, + max_depth: u8, max_time: Duration, table: Arc::M>>, negamaxer: Negamaxer::M>>>, @@ -316,7 +316,7 @@ where /// Set the maximum depth to search. Disables the timeout. /// This can be changed between moves while reusing the transposition table. - pub fn set_max_depth(&mut self, depth: usize) { + pub fn set_max_depth(&mut self, depth: u8) { self.max_depth = depth; self.max_time = Duration::new(0, 0); } diff --git a/src/strategies/negamax.rs b/src/strategies/negamax.rs index 7ebe964..fa520cd 100644 --- a/src/strategies/negamax.rs +++ b/src/strategies/negamax.rs @@ -10,7 +10,7 @@ use rand::seq::SliceRandom; use std::cmp::max; pub struct Negamax { - max_depth: usize, + max_depth: u8, move_pool: MovePool<::M>, rng: rand::rngs::ThreadRng, prev_value: Evaluation, @@ -18,7 +18,7 @@ pub struct Negamax { } impl Negamax { - pub fn new(eval: E, depth: usize) -> Negamax { + pub fn new(eval: E, depth: u8) -> Negamax { Negamax { max_depth: depth, move_pool: MovePool::<_>::default(), @@ -34,7 +34,7 @@ impl Negamax { } fn negamax( - &mut self, s: &mut ::S, depth: usize, mut alpha: Evaluation, beta: Evaluation, + &mut self, s: &mut ::S, depth: u8, mut alpha: Evaluation, beta: Evaluation, ) -> Evaluation where <::G as Game>::M: Copy, diff --git a/src/util.rs b/src/util.rs index 20a69fe..75a92a0 100644 --- a/src/util.rs +++ b/src/util.rs @@ -60,7 +60,7 @@ impl MovePool { } fn perft_recurse( - pool: &mut MovePool, state: &mut G::S, depth: usize, single_thread_cutoff: usize, + pool: &mut MovePool, state: &mut G::S, depth: u8, single_thread_cutoff: u8, ) -> u64 where ::S: Clone + Sync, @@ -103,9 +103,7 @@ where n } -pub fn perft( - state: &mut ::S, max_depth: usize, multi_threaded: bool, -) -> Vec +pub fn perft(state: &mut ::S, max_depth: u8, multi_threaded: bool) -> Vec where ::S: Clone + Sync, ::M: Copy + Sync, diff --git a/tests/strategies.rs b/tests/strategies.rs index 78ae812..d847a29 100644 --- a/tests/strategies.rs +++ b/tests/strategies.rs @@ -16,7 +16,7 @@ use std::collections::hash_map::DefaultHasher; use std::hash::Hasher; pub struct PlainNegamax { - depth: usize, + depth: u8, root_value: Evaluation, // All moves tied with the best valuation. best_moves: Vec<::M>, @@ -24,11 +24,11 @@ pub struct PlainNegamax { } impl PlainNegamax { - pub fn new(eval: E, depth: usize) -> PlainNegamax { + pub fn new(eval: E, depth: u8) -> PlainNegamax { PlainNegamax { depth: depth, root_value: 0, best_moves: Vec::new(), eval } } - fn negamax(&self, s: &mut ::S, depth: usize) -> Evaluation + fn negamax(&self, s: &mut ::S, depth: u8) -> Evaluation where <::G as Game>::M: Copy, { @@ -102,7 +102,7 @@ impl minimax::Evaluator for RandomEvaluator { } } -fn generate_random_state(depth: usize) -> connect4::Board { +fn generate_random_state(depth: u8) -> connect4::Board { let mut rng = rand::thread_rng(); let mut b = connect4::Board::default(); for _ in 0..depth { @@ -179,7 +179,7 @@ fn compare_plain_negamax() { let opt = IterativeOptions::new().with_table_byte_size(64000); let mut ybw = ParallelYbw::new(RandomEvaluator::default(), opt, YbwOptions::default()); - ybw.set_max_depth(max_depth as u8); + ybw.set_max_depth(max_depth); let ybw_move = ybw.choose_move(&b).unwrap(); let ybw_value = ybw.root_value(); assert_eq!(value, ybw_value, "search depth={}\n{}", max_depth, b); @@ -236,7 +236,7 @@ fn compare_deep_negamax() { let mut parallel = ParallelYbw::new(RandomEvaluator::default(), opt, YbwOptions::default()); - parallel.set_max_depth(max_depth as u8); + parallel.set_max_depth(max_depth); parallel.choose_move(&b).unwrap(); let parallel_value = parallel.root_value(); assert_eq!(value, parallel_value, "search depth={}\n{}", max_depth, b); From 096439890c8e354f8dfc011e7dc320c7c4224995 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Wed, 25 May 2022 09:57:55 -0700 Subject: [PATCH 091/162] num_threads should be usize everywhere --- src/strategies/mcts.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/strategies/mcts.rs b/src/strategies/mcts.rs index 01c4afb..2c1dfcf 100644 --- a/src/strategies/mcts.rs +++ b/src/strategies/mcts.rs @@ -127,8 +127,8 @@ impl MCTSOptions { } /// How many threads to run. Defaults to num_cpus. - pub fn with_num_threads(mut self, threads: u32) -> Self { - self.num_threads = Some(threads as usize); + pub fn with_num_threads(mut self, threads: usize) -> Self { + self.num_threads = Some(threads); self } } From 1b8bfeac74d7b1c6808ce9f3aeec06c6b7cd1175 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Wed, 25 May 2022 11:25:21 -0700 Subject: [PATCH 092/162] Refactor noisy moves to be part of Evaluator instead of Game. They are not an intrinsic part of the rules, but a design choice by the engine. --- src/interface.rs | 29 +++++++++++++++-------------- src/strategies/iterative.rs | 21 +++++++++++++-------- src/strategies/ybw.rs | 25 ++++++++++++++----------- 3 files changed, 42 insertions(+), 33 deletions(-) diff --git a/src/interface.rs b/src/interface.rs index 8a1d22b..f3b65a7 100644 --- a/src/interface.rs +++ b/src/interface.rs @@ -22,6 +22,21 @@ pub trait Evaluator { /// move next. fn evaluate(&self, s: &::S) -> Evaluation; + /// Optional interface to support strategies using quiescence search. + /// + /// A "noisy" move is a threatening move that requires a response. + /// + /// The term comes from chess, where capturing a piece is considered a noisy + /// move. Capturing a piece is often the first move out of an exchange of + /// captures. Evaluating the board state after only the first capture can + /// give a misleadingly high score. The solution is to continue the search + /// among only noisy moves and find the score once the board state settles. + fn is_noisy_move(&self, _state: &::S, _move: ::M) -> bool { + // When unimplemented, there are no noisy moves and search terminates + // immediately. + false + } + /// After generating moves, reorder them to explore the most promising first. /// The default implementation evaluates all thes game states and sorts highest Evaluation first. fn reorder_moves(&self, s: &mut ::S, moves: &mut [::M]) @@ -112,20 +127,6 @@ pub trait Game: Sized { /// Generate moves at the given state. fn generate_moves(state: &Self::S, moves: &mut Vec); - /// Optional interface to support strategies using quiescence search. - /// - /// A "noisy" move is a threatening move that requires a response. - /// - /// The term comes from chess, where capturing a piece is considered a noisy - /// move. Capturing a piece is often the first move out of an exchange of - /// captures. Evaluating the board state after only the first capture can - /// give a misleadingly high score. The solution is to continue the search - /// among only noisy moves and find the score once the board state settles. - fn generate_noisy_moves(_state: &Self::S, _moves: &mut Vec) { - // When unimplemented, there are no noisy moves and search terminates - // immediately. - } - /// Returns `Some(PlayerJustMoved)` or `Some(PlayerToMove)` if there's a winner, /// `Some(Draw)` if the state is terminal without a winner, and `None` if /// the state is non-terminal. diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index b9a07ed..1018b21 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -202,7 +202,7 @@ impl IterativeOptions { /// Enable [quiescence /// search](https://en.wikipedia.org/wiki/Quiescence_search) at the leaves - /// of the search tree. The Game must implement `generate_noisy_moves` + /// of the search tree. The Evaluator must implement `is_noisy_move` /// for the search to know when the state has become "quiet". pub fn with_quiescence_search_depth(mut self, depth: u8) -> Self { self.max_quiescence_depth = depth; @@ -283,15 +283,15 @@ where } let mut moves = self.move_pool.alloc(); - E::G::generate_noisy_moves(s, &mut moves); - if moves.is_empty() { - // Only quiet moves remain, return leaf evaluation. - self.move_pool.free(moves); - return Some(self.eval.evaluate(s)); - } + E::G::generate_moves(s, &mut moves); let mut best = WORST_EVAL; + let mut any_noisy = false; for m in moves.iter() { + if !self.eval.is_noisy_move(s, *m) { + continue; + } + any_noisy = true; m.apply(s); let value = -self.noisy_negamax(s, depth - 1, -beta, -alpha)?; m.undo(s); @@ -302,7 +302,12 @@ where } } self.move_pool.free(moves); - Some(best) + Some(if !any_noisy { + // Only quiet moves remain, return leaf evaluation. + self.eval.evaluate(s) + } else { + best + }) } // Recursively compute negamax on the game state. Returns None if it hits the timeout. diff --git a/src/strategies/ybw.rs b/src/strategies/ybw.rs index a627ee5..203ec8b 100644 --- a/src/strategies/ybw.rs +++ b/src/strategies/ybw.rs @@ -80,13 +80,11 @@ where Self { table, eval, opts, ybw_opts, timeout } } + // Negamax only among noisy moves. // Negamax only among noisy moves. fn noisy_negamax( &self, s: &mut ::S, depth: u8, mut alpha: Evaluation, beta: Evaluation, - ) -> Option - where - ::M: Copy, - { + ) -> Option { if self.timeout.load(Ordering::Relaxed) { return None; } @@ -99,15 +97,15 @@ where //let mut moves = self.move_pool.alloc(); let mut moves = Vec::new(); - E::G::generate_noisy_moves(s, &mut moves); - if moves.is_empty() { - // Only quiet moves remain, return leaf evaluation. - //self.move_pool.free(moves); - return Some(self.eval.evaluate(s)); - } + E::G::generate_moves(s, &mut moves); let mut best = WORST_EVAL; + let mut any_noisy = false; for m in moves.iter() { + if !self.eval.is_noisy_move(s, *m) { + continue; + } + any_noisy = true; m.apply(s); let value = -self.noisy_negamax(s, depth - 1, -beta, -alpha)?; m.undo(s); @@ -118,7 +116,12 @@ where } } //self.move_pool.free(moves); - Some(best) + Some(if !any_noisy { + // Only quiet moves remain, return leaf evaluation. + self.eval.evaluate(s) + } else { + best + }) } // Recursively compute negamax on the game state. Returns None if it hits the timeout. From 4581f76b9f8d3425a15c609d9c30f87741c7556d Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Thu, 26 May 2022 17:51:08 -0700 Subject: [PATCH 093/162] Revert noisy moves to be a generation instead of a bool. Keep it in Evaluator though. --- src/interface.rs | 8 ++++++-- src/strategies/iterative.rs | 20 +++++++------------- src/strategies/ybw.rs | 19 ++++++------------- 3 files changed, 19 insertions(+), 28 deletions(-) diff --git a/src/interface.rs b/src/interface.rs index f3b65a7..9b88dfc 100644 --- a/src/interface.rs +++ b/src/interface.rs @@ -31,10 +31,14 @@ pub trait Evaluator { /// captures. Evaluating the board state after only the first capture can /// give a misleadingly high score. The solution is to continue the search /// among only noisy moves and find the score once the board state settles. - fn is_noisy_move(&self, _state: &::S, _move: ::M) -> bool { + /// + /// Noisy moves are not inherent parts of the rules, but engine decisions, + /// so they are implemented in Evaluator instead of Game. + fn generate_noisy_moves( + &self, _state: &::S, _moves: &mut Vec<::M>, + ) { // When unimplemented, there are no noisy moves and search terminates // immediately. - false } /// After generating moves, reorder them to explore the most promising first. diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index 1018b21..bce41a4 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -202,7 +202,7 @@ impl IterativeOptions { /// Enable [quiescence /// search](https://en.wikipedia.org/wiki/Quiescence_search) at the leaves - /// of the search tree. The Evaluator must implement `is_noisy_move` + /// of the search tree. The Evaluator must implement `generate_noisy_moves` /// for the search to know when the state has become "quiet". pub fn with_quiescence_search_depth(mut self, depth: u8) -> Self { self.max_quiescence_depth = depth; @@ -283,15 +283,14 @@ where } let mut moves = self.move_pool.alloc(); - E::G::generate_moves(s, &mut moves); + self.eval.generate_noisy_moves(s, &mut moves); + if moves.is_empty() { + self.move_pool.free(moves); + return Some(self.eval.evaluate(s)); + } let mut best = WORST_EVAL; - let mut any_noisy = false; for m in moves.iter() { - if !self.eval.is_noisy_move(s, *m) { - continue; - } - any_noisy = true; m.apply(s); let value = -self.noisy_negamax(s, depth - 1, -beta, -alpha)?; m.undo(s); @@ -302,12 +301,7 @@ where } } self.move_pool.free(moves); - Some(if !any_noisy { - // Only quiet moves remain, return leaf evaluation. - self.eval.evaluate(s) - } else { - best - }) + Some(best) } // Recursively compute negamax on the game state. Returns None if it hits the timeout. diff --git a/src/strategies/ybw.rs b/src/strategies/ybw.rs index 203ec8b..7a823a8 100644 --- a/src/strategies/ybw.rs +++ b/src/strategies/ybw.rs @@ -80,7 +80,6 @@ where Self { table, eval, opts, ybw_opts, timeout } } - // Negamax only among noisy moves. // Negamax only among noisy moves. fn noisy_negamax( &self, s: &mut ::S, depth: u8, mut alpha: Evaluation, beta: Evaluation, @@ -97,15 +96,14 @@ where //let mut moves = self.move_pool.alloc(); let mut moves = Vec::new(); - E::G::generate_moves(s, &mut moves); + self.eval.generate_noisy_moves(s, &mut moves); + if moves.is_empty() { + //self.move_pool.free(moves); + return Some(self.eval.evaluate(s)); + } let mut best = WORST_EVAL; - let mut any_noisy = false; for m in moves.iter() { - if !self.eval.is_noisy_move(s, *m) { - continue; - } - any_noisy = true; m.apply(s); let value = -self.noisy_negamax(s, depth - 1, -beta, -alpha)?; m.undo(s); @@ -116,12 +114,7 @@ where } } //self.move_pool.free(moves); - Some(if !any_noisy { - // Only quiet moves remain, return leaf evaluation. - self.eval.evaluate(s) - } else { - best - }) + Some(best) } // Recursively compute negamax on the game state. Returns None if it hits the timeout. From 32ba31d0d1941fa0b3e667baaacc1fb8d7ad61ac Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Tue, 7 Jun 2022 17:04:09 -0700 Subject: [PATCH 094/162] Copy the YBW verbosity style to the other iterative strategies. --- src/strategies/iterative.rs | 15 +++++++-------- src/strategies/lazy_smp.rs | 26 +++++++++++++++++--------- src/strategies/ybw.rs | 20 ++++++++++---------- 3 files changed, 34 insertions(+), 27 deletions(-) diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index bce41a4..c7e1072 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -567,7 +567,7 @@ where let root_hash = s.zobrist_hash(); let mut s_clone = s.clone(); let mut best_move = None; - let mut interval_start = start_time; + let mut interval_start; let mut maxxed = false; // Store the moves so they can be reordered every iteration. let mut moves = Vec::new(); @@ -582,10 +582,7 @@ where depth = self.opts.step_increment; } while depth <= self.max_depth as u8 { - if self.opts.verbose && !maxxed { - interval_start = Instant::now(); - eprintln!("Iterative search depth {}", depth); - } + interval_start = Instant::now(); let search = if self.opts.mtdf { self.mtdf(&mut s_clone, depth, self.prev_value) } else { @@ -604,7 +601,8 @@ where let end = Instant::now(); let interval = end - interval_start; eprintln!( - "Iterative aspiration search took {}ms; value{} bestmove={}", + "Iterative aspiration depth{:>2} took{:>5}ms; bounds{:>5} bestmove={}", + depth, interval.as_millis(), entry.bounds(), move_id::(&mut s_clone, entry.best_move) @@ -626,9 +624,10 @@ where if self.opts.verbose && !maxxed { let interval = Instant::now() - interval_start; eprintln!( - "Iterative full search took {}ms; returned {:?} bestmove={}", + "Iterative fullsearch depth{:>2} took{:>5}ms; value{:>6} bestmove={}", + depth, interval.as_millis(), - entry.value, + entry.value_string(), move_id::(&mut s_clone, best_move) ); if unclamp_value(entry.value).abs() == BEST_EVAL { diff --git a/src/strategies/lazy_smp.rs b/src/strategies/lazy_smp.rs index 48f5994..2057f34 100644 --- a/src/strategies/lazy_smp.rs +++ b/src/strategies/lazy_smp.rs @@ -54,6 +54,10 @@ impl LazySmpOptions { self.differing_depths = true; self } + + fn num_threads(self) -> usize { + self.num_threads.unwrap_or_else(num_cpus::get) + } } #[derive(Clone)] @@ -254,6 +258,7 @@ where signal: Arc::S>>, opts: IterativeOptions, + num_threads: usize, // Runtime stats for the last move generated. prev_value: Evaluation, @@ -279,8 +284,9 @@ where let table = Arc::new(LockfreeTable::new(opts.table_byte_size)); let stats = Arc::new(SharedStats::new()); let signal = Arc::new(CommandSignal::new()); + let num_threads = smp_opts.num_threads(); // start n-1 helper threads - for iter in 1..smp_opts.num_threads.unwrap_or_else(num_cpus::get) { + for iter in 1..num_threads { let table2 = table.clone(); let eval2 = eval.clone(); let opts2 = opts.clone(); @@ -306,6 +312,7 @@ where signal, prev_value: 0, opts, + num_threads, actual_depth: 0, nodes_explored: Vec::new(), shared_stats: stats, @@ -384,7 +391,7 @@ where let root_hash = s.zobrist_hash(); let mut s_clone = s.clone(); let mut best_move = None; - let mut interval_start = start_time; + let mut interval_start; let mut maxxed = false; // Store the moves so they can be reordered every iteration. let mut moves = Vec::new(); @@ -398,10 +405,7 @@ where depth = self.opts.step_increment; } while depth <= self.max_depth as u8 { - if self.opts.verbose && !maxxed { - interval_start = Instant::now(); - eprintln!("LazySmp search depth {}", depth); - } + interval_start = Instant::now(); if let Some(window) = self.opts.aspiration_window { // First, parallel aspiration search to at least establish some bounds. let mut alpha = self.prev_value.saturating_sub(window); @@ -424,7 +428,9 @@ where let end = Instant::now(); let interval = end - interval_start; eprintln!( - "LazySmp aspiration search took {}ms; value {} bestmove={}", + "LazySmp (threads={}) aspiration depth{:>2} took{:>5}ms; bounds{:>5} bestmove={}", + self.num_threads, + depth, interval.as_millis(), entry.bounds(), move_id::(&mut s_clone, entry.best_move) @@ -448,9 +454,11 @@ where if self.opts.verbose && !maxxed { let interval = Instant::now() - interval_start; eprintln!( - "LazySmp full search took {}ms; returned {:?} bestmove={}", + "LazySmp (threads={}) fullsearch depth{:>2} took{:>5}ms; value{:>6} bestmove={}", + self.num_threads, + depth, interval.as_millis(), - value.unwrap(), + entry.value_string(), move_id::(&mut s_clone, entry.best_move) ); if unclamp_value(value.unwrap()).abs() == BEST_EVAL { diff --git a/src/strategies/ybw.rs b/src/strategies/ybw.rs index 7a823a8..3c58789 100644 --- a/src/strategies/ybw.rs +++ b/src/strategies/ybw.rs @@ -56,6 +56,10 @@ impl YbwOptions { self.background_pondering = true; self } + + fn num_threads(self) -> usize { + self.num_threads.unwrap_or_else(num_cpus::get) + } } struct ParallelNegamaxer { @@ -278,7 +282,7 @@ where let root_hash = state.zobrist_hash(); let mut best_move = None; let mut best_value = 0; - let mut interval_start = Instant::now(); + let mut interval_start; let mut maxxed = false; let mut pv = String::new(); @@ -287,15 +291,9 @@ where depth = self.opts.step_increment; } while depth <= max_depth as u8 { - if self.opts.verbose && !background && !maxxed { - interval_start = Instant::now(); - eprint!("Ybw search depth{:>2}", depth); - } + interval_start = Instant::now(); if self.negamax(&mut state, depth, WORST_EVAL, BEST_EVAL).is_none() { // Timeout. Return the best move from the previous depth. - if self.opts.verbose && !background && !maxxed { - eprintln!(" timed out"); - } break; } let entry = match self.table.lookup(root_hash) { @@ -316,7 +314,9 @@ where if self.opts.verbose && !background && !maxxed { let interval = Instant::now() - interval_start; eprintln!( - " took{:>5}ms; returned{:>5}; bestmove {}", + "Ybw search (threads={}) depth{:>2} took{:>5}ms; returned{:>5}; bestmove {}", + self.ybw_opts.num_threads(), + depth, interval.as_millis(), entry.value_string(), move_id::(&mut state, best_move) @@ -357,7 +357,7 @@ pub struct ParallelYbw { impl ParallelYbw { pub fn new(eval: E, opts: IterativeOptions, ybw_opts: YbwOptions) -> ParallelYbw { let table = Arc::new(LockfreeTable::new(opts.table_byte_size)); - let num_threads = ybw_opts.num_threads.unwrap_or_else(num_cpus::get); + let num_threads = ybw_opts.num_threads(); let pool_builder = rayon::ThreadPoolBuilder::new().num_threads(num_threads); ParallelYbw { max_depth: 99, From 0ba09fae85011d9833728cc438be78fe937b3fb3 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Tue, 7 Jun 2022 20:23:02 -0700 Subject: [PATCH 095/162] Add timeout, max_depth, and principal_variation to the Strategy trait. --- src/interface.rs | 14 ++++++++++++++ src/strategies/iterative.rs | 35 ++++++++++++++--------------------- src/strategies/lazy_smp.rs | 35 ++++++++++++++--------------------- src/strategies/mcts.rs | 17 +++++++++++------ src/strategies/negamax.rs | 4 ++++ src/strategies/ybw.rs | 30 ++++++++++-------------------- 6 files changed, 67 insertions(+), 68 deletions(-) diff --git a/src/interface.rs b/src/interface.rs index 9b88dfc..047210d 100644 --- a/src/interface.rs +++ b/src/interface.rs @@ -148,4 +148,18 @@ pub trait Game: Sized { /// Defines a method of choosing a move for the current player. pub trait Strategy { fn choose_move(&mut self, state: &G::S) -> Option; + + /// For strategies that can ponder indefinitely, set the timeout. + /// This can be changed between calls to choose_move. + fn set_timeout(&mut self, _timeout: std::time::Duration) {} + + /// Set the maximum depth to evaluate (instead of the timeout). + /// This can be changed between calls to choose_move. + fn set_max_depth(&mut self, _depth: u8) {} + + /// From the last choose_move call, return the principal variation, + /// i.e. the best sequence of moves for both players. + fn principal_variation(&self) -> Vec { + Vec::new() + } } diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index c7e1072..5792e4b 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -480,21 +480,6 @@ where } } - /// Set the maximum depth to search. Disables the timeout. - /// This can be changed between moves while reusing the transposition table. - pub fn set_max_depth(&mut self, depth: u8) { - self.max_depth = depth; - self.max_time = Duration::new(0, 0); - } - - /// Set the maximum time to compute the best move. When the timeout is - /// hit, it returns the best move found of the previous full - /// iteration. Unlimited max depth. - pub fn set_timeout(&mut self, max_time: Duration) { - self.max_time = max_time; - self.max_depth = 99; - } - /// Return a human-readable summary of the last move generation. pub fn stats(&self, s: &mut ::S) -> String { let total_nodes_explored: u64 = self.nodes_explored.iter().sum(); @@ -515,12 +500,6 @@ where unclamp_value(self.prev_value) } - /// Return what the engine considered to be the best sequence of moves - /// from both sides. - pub fn principal_variation(&self) -> &[::M] { - &self.pv[..] - } - fn mtdf( &mut self, s: &mut ::S, depth: u8, mut guess: Evaluation, ) -> Option { @@ -649,4 +628,18 @@ where } best_move } + + fn set_timeout(&mut self, max_time: Duration) { + self.max_time = max_time; + self.max_depth = 99; + } + + fn set_max_depth(&mut self, depth: u8) { + self.max_depth = depth; + self.max_time = Duration::new(0, 0); + } + + fn principal_variation(&self) -> Vec<::M> { + self.pv.clone() + } } diff --git a/src/strategies/lazy_smp.rs b/src/strategies/lazy_smp.rs index 2057f34..29c1a03 100644 --- a/src/strategies/lazy_smp.rs +++ b/src/strategies/lazy_smp.rs @@ -321,31 +321,10 @@ where } } - /// Set the maximum depth to search. Disables the timeout. - /// This can be changed between moves while reusing the transposition table. - pub fn set_max_depth(&mut self, depth: u8) { - self.max_depth = depth; - self.max_time = Duration::new(0, 0); - } - - /// Set the maximum time to compute the best move. When the timeout is - /// hit, it returns the best move found of the previous full - /// iteration. Unlimited max depth. - pub fn set_timeout(&mut self, max_time: Duration) { - self.max_time = max_time; - self.max_depth = 99; - } - #[doc(hidden)] pub fn root_value(&self) -> Evaluation { unclamp_value(self.prev_value) } - - /// Return what the engine considered to be the best sequence of moves - /// from both sides. - pub fn principal_variation(&self) -> &[::M] { - &self.pv[..] - } } impl LazySmp @@ -481,4 +460,18 @@ where } best_move } + + fn set_timeout(&mut self, max_time: Duration) { + self.max_time = max_time; + self.max_depth = 99; + } + + fn set_max_depth(&mut self, depth: u8) { + self.max_depth = depth; + self.max_time = Duration::new(0, 0); + } + + fn principal_variation(&self) -> Vec<::M> { + self.pv.clone() + } } diff --git a/src/strategies/mcts.rs b/src/strategies/mcts.rs index 2c1dfcf..3d3c7df 100644 --- a/src/strategies/mcts.rs +++ b/src/strategies/mcts.rs @@ -154,12 +154,6 @@ impl MonteCarloTreeSearch { } } - /// Set the time limit per move. - pub fn set_timeout(&mut self, timeout: Duration) { - self.max_rollouts = 0; - self.max_time = timeout; - } - /// Instead of a timeout, run this many rollouts to choose a move. pub fn set_max_rollouts(&mut self, rollouts: u32) { self.max_time = Duration::default(); @@ -300,4 +294,15 @@ where let exploration = 0.0; // Just get best node. root.best_child(exploration).map(|node| node.m.unwrap()) } + + fn set_timeout(&mut self, timeout: Duration) { + self.max_rollouts = 0; + self.max_time = timeout; + } + + fn set_max_depth(&mut self, depth: u8) { + // Set some arbitrary function of rollouts. + self.max_time = Duration::default(); + self.max_rollouts = depth as u32 * 100; + } } diff --git a/src/strategies/negamax.rs b/src/strategies/negamax.rs index fa520cd..8b8815e 100644 --- a/src/strategies/negamax.rs +++ b/src/strategies/negamax.rs @@ -96,4 +96,8 @@ where self.prev_value = best; Some(best_move) } + + fn set_max_depth(&mut self, depth: u8) { + self.max_depth = depth; + } } diff --git a/src/strategies/ybw.rs b/src/strategies/ybw.rs index 3c58789..1172ad1 100644 --- a/src/strategies/ybw.rs +++ b/src/strategies/ybw.rs @@ -373,30 +373,10 @@ impl ParallelYbw { } } - /// Set the maximum depth to search. Disables the timeout. - /// This can be changed between moves while reusing the transposition table. - pub fn set_max_depth(&mut self, depth: u8) { - self.max_depth = depth; - self.max_time = Duration::new(0, 0); - } - - /// Set the maximum time to compute the best move. When the timeout is - /// hit, it returns the best move found of the previous full - /// iteration. Unlimited max depth. - pub fn set_timeout(&mut self, max_time: Duration) { - self.max_time = max_time; - self.max_depth = 99; - } - #[doc(hidden)] pub fn root_value(&self) -> Evaluation { unclamp_value(self.prev_value) } - - // Unimplemented, just use verbose mode. - pub fn principal_variation(&self) -> &[::M] { - &[] - } } impl Strategy for ParallelYbw @@ -453,6 +433,16 @@ where None } } + + fn set_timeout(&mut self, max_time: Duration) { + self.max_time = max_time; + self.max_depth = 99; + } + + fn set_max_depth(&mut self, depth: u8) { + self.max_depth = depth; + self.max_time = Duration::new(0, 0); + } } impl Drop for ParallelYbw { From e1fce7edd02957077510645946f98ae4349df164 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Tue, 7 Jun 2022 21:16:20 -0700 Subject: [PATCH 096/162] Return principal variation from YBW --- src/strategies/ybw.rs | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/src/strategies/ybw.rs b/src/strategies/ybw.rs index 1172ad1..35e568a 100644 --- a/src/strategies/ybw.rs +++ b/src/strategies/ybw.rs @@ -69,6 +69,7 @@ struct ParallelNegamaxer { ybw_opts: YbwOptions, timeout: Arc, // TODO: stats + pv: Mutex::M>>, } impl ParallelNegamaxer @@ -81,7 +82,11 @@ where opts: IterativeOptions, ybw_opts: YbwOptions, eval: E, table: Arc::M>>, timeout: Arc, ) -> Self { - Self { table, eval, opts, ybw_opts, timeout } + Self { table, eval, opts, ybw_opts, timeout, pv: Mutex::new(Vec::new()) } + } + + fn principal_variation(&self) -> Vec<::M> { + self.pv.lock().unwrap().clone() } // Negamax only among noisy moves. @@ -329,6 +334,7 @@ where depth += self.opts.step_increment; let mut pv_moves = Vec::new(); self.table.populate_pv(&mut pv_moves, &mut state, depth); + self.pv.lock().unwrap().clone_from(&pv_moves); pv = pv_string::(&pv_moves[..], &mut state); } if self.opts.verbose && !background { @@ -346,6 +352,7 @@ pub struct ParallelYbw { table: Arc::M>>, //move_pool: MovePool<::M>, prev_value: Evaluation, + principal_variation: Vec<::M>, eval: E, thread_pool: rayon::ThreadPool, @@ -366,6 +373,7 @@ impl ParallelYbw { table, //move_pool: MovePool::<_>::default(), prev_value: 0, + principal_variation: Vec::new(), thread_pool: pool_builder.build().unwrap(), opts, ybw_opts, @@ -404,8 +412,11 @@ where timeout.clone(), ); // Launch in threadpool and wait for result. - self.thread_pool - .install(|| negamaxer.iterative_search(s.clone(), self.max_depth, false)) + let value_move = self + .thread_pool + .install(|| negamaxer.iterative_search(s.clone(), self.max_depth, false)); + self.principal_variation = negamaxer.principal_variation(); + value_move }; if let Some((best_move, value)) = best_value_move { self.prev_value = value; @@ -443,6 +454,10 @@ where self.max_depth = depth; self.max_time = Duration::new(0, 0); } + + fn principal_variation(&self) -> Vec<::M> { + self.principal_variation.clone() + } } impl Drop for ParallelYbw { From 1a918ebb64995400184c86fe691e9b6b586dc9c7 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Tue, 7 Jun 2022 21:30:37 -0700 Subject: [PATCH 097/162] Detect cycles manually and find PV to arbitrary depth. --- src/strategies/iterative.rs | 2 +- src/strategies/lazy_smp.rs | 2 +- src/strategies/table.rs | 7 ++++--- src/strategies/ybw.rs | 2 +- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index 5792e4b..c48f6ab 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -619,7 +619,7 @@ where self.negamaxer.nodes_explored = 0; self.prev_value = entry.value; depth += self.opts.step_increment; - self.negamaxer.table.populate_pv(&mut self.pv, &mut s_clone, depth); + self.negamaxer.table.populate_pv(&mut self.pv, &mut s_clone); } self.wall_time = start_time.elapsed(); if self.opts.verbose { diff --git a/src/strategies/lazy_smp.rs b/src/strategies/lazy_smp.rs index 29c1a03..59af2d9 100644 --- a/src/strategies/lazy_smp.rs +++ b/src/strategies/lazy_smp.rs @@ -448,7 +448,7 @@ where self.actual_depth = max(self.actual_depth, depth); self.prev_value = entry.value; depth += self.opts.step_increment; - self.table.populate_pv(&mut self.pv, &mut s_clone, depth); + self.table.populate_pv(&mut self.pv, &mut s_clone); self.shared_stats.update(&mut self.negamaxer); self.nodes_explored.push(self.shared_stats.reset_nodes_explored()); } diff --git a/src/strategies/table.rs b/src/strategies/table.rs index 836eae3..0a652b2 100644 --- a/src/strategies/table.rs +++ b/src/strategies/table.rs @@ -106,12 +106,13 @@ pub(super) trait Table { // After finishing a search, populate the principal variation as deep as // the table remembers it. - fn populate_pv(&self, pv: &mut Vec, s: &mut G::S, mut depth: u8) + fn populate_pv(&self, pv: &mut Vec, s: &mut G::S) where M: Move, ::S: Zobrist, { pv.clear(); + let mut hash_history = Vec::new(); let mut hash = s.zobrist_hash(); while let Some(entry) = self.lookup(hash) { // The principal variation should only have exact nodes, as other @@ -127,10 +128,10 @@ pub(super) trait Table { m.apply(s); hash = s.zobrist_hash(); // Prevent cyclical PVs from being infinitely long. - if depth == 0 { + if hash_history.contains(&hash) { break; } - depth -= 1; + hash_history.push(hash); } // Restore state. for m in pv.iter().rev() { diff --git a/src/strategies/ybw.rs b/src/strategies/ybw.rs index 35e568a..cb6880f 100644 --- a/src/strategies/ybw.rs +++ b/src/strategies/ybw.rs @@ -333,7 +333,7 @@ where depth += self.opts.step_increment; let mut pv_moves = Vec::new(); - self.table.populate_pv(&mut pv_moves, &mut state, depth); + self.table.populate_pv(&mut pv_moves, &mut state); self.pv.lock().unwrap().clone_from(&pv_moves); pv = pv_string::(&pv_moves[..], &mut state); } From 3aedf2193616cebd81d8e908e9e4d81186634580 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Tue, 7 Jun 2022 21:33:48 -0700 Subject: [PATCH 098/162] Release v0.3.0 Breaking API changes: * Adjust all depth arguments by 1 to make more sense. A search of depth 1 now evaluates all children. * All depths are now u8 and all thread counts are now usize. * Move noisy move generation to Evaluator, as it is not an intrinsic part of the Game rules. * Moved timeout, max_depth, and principal_variation into the Strategy trait. Other changes: * New verbosity setting gives more insight to what the iterative searches are up to. * New MTD(f) algorithm for single-threaded IterativeSearch. * Added background pondering option to ParallelYbw. * Basic null-move-pruning option. --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 1b9b289..d282676 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "minimax" -version = "0.2.1" +version = "0.3.0" authors = [ "Samuel Fredrickson ", "Eric Roshan-Eisner " ] description = "Generic implementations of Minimax." documentation = "https://docs.rs/minimax" From 7e2eb85304f6dc97304646ff2d2548a8593773db Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Tue, 7 Jun 2022 22:55:08 -0700 Subject: [PATCH 099/162] Fix clippy lints. --- src/strategies/iterative.rs | 2 +- src/strategies/lazy_smp.rs | 8 ++++---- src/strategies/ybw.rs | 10 +++++----- src/util.rs | 3 ++- 4 files changed, 12 insertions(+), 11 deletions(-) diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index c48f6ab..de11144 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -466,7 +466,7 @@ where { pub fn new(eval: E, opts: IterativeOptions) -> IterativeSearch { let table = TranspositionTable::new(opts.table_byte_size, opts.strategy); - let negamaxer = Negamaxer::new(table, eval, opts.clone()); + let negamaxer = Negamaxer::new(table, eval, opts); IterativeSearch { max_depth: 99, max_time: Duration::from_secs(5), diff --git a/src/strategies/lazy_smp.rs b/src/strategies/lazy_smp.rs index 59af2d9..91f860c 100644 --- a/src/strategies/lazy_smp.rs +++ b/src/strategies/lazy_smp.rs @@ -289,7 +289,7 @@ where for iter in 1..num_threads { let table2 = table.clone(); let eval2 = eval.clone(); - let opts2 = opts.clone(); + let opts2 = opts; let signal2 = signal.clone(); let stats2 = stats.clone(); let extra_depth = if smp_opts.differing_depths { iter as u8 & 1 } else { 0 }; @@ -303,7 +303,7 @@ where helper.process(); }); } - let negamaxer = Negamaxer::new(table.clone(), eval, opts.clone()); + let negamaxer = Negamaxer::new(table.clone(), eval, opts); LazySmp { max_depth: 99, max_time: Duration::from_secs(5), @@ -392,7 +392,7 @@ where alpha = WORST_EVAL; } let beta = self.prev_value.saturating_add(window); - self.signal.new_search(&s, depth, alpha, beta); + self.signal.new_search(s, depth, alpha, beta); if self .negamaxer @@ -419,7 +419,7 @@ where } } - self.signal.new_search(&s, depth, WORST_EVAL, BEST_EVAL); + self.signal.new_search(s, depth, WORST_EVAL, BEST_EVAL); let value = self.negamaxer.search_and_reorder(&mut s_clone, &mut moves, depth); if value.is_none() { diff --git a/src/strategies/ybw.rs b/src/strategies/ybw.rs index cb6880f..6283656 100644 --- a/src/strategies/ybw.rs +++ b/src/strategies/ybw.rs @@ -405,11 +405,11 @@ where let best_value_move = { let negamaxer = ParallelNegamaxer::new( - self.opts.clone(), - self.ybw_opts.clone(), + self.opts, + self.ybw_opts, self.eval.clone(), self.table.clone(), - timeout.clone(), + timeout, ); // Launch in threadpool and wait for result. let value_move = self @@ -426,8 +426,8 @@ where // Create a separate negamaxer to have a dedicated cancel // signal, and to allow the negamaxer to outlive this scope. let negamaxer = ParallelNegamaxer::new( - self.opts.clone(), - self.ybw_opts.clone(), + self.opts, + self.ybw_opts, self.eval.clone(), self.table.clone(), self.background_cancel.clone(), diff --git a/src/util.rs b/src/util.rs index 75a92a0..72c2c8e 100644 --- a/src/util.rs +++ b/src/util.rs @@ -117,7 +117,8 @@ where let count = perft_recurse::(&mut pool, state, depth, single_thread_cutoff); let dur = start.elapsed(); let rate = count as f64 / dur.as_secs_f64() / 1000.0; - println!("{:>5} {:>15} {:>11} {:>11.1}", depth, count, format!("{:.1?}", dur), rate); + let time = format!("{:.1?}", dur); + println!("{:>5} {:>15} {:>11} {:>11.1}", depth, count, time, rate); counts.push(count); } counts From 54893ad9fbaa865559371e36ae939ac41cd83491 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Tue, 14 Jun 2022 10:29:39 -0700 Subject: [PATCH 100/162] Template Game type to Random and MCTS. Previously it was too cumbersome to create one and call choose_move. --- src/strategies/mcts.rs | 40 +++++++++++++++++++++++++++------------- src/strategies/random.rs | 14 ++++++++------ 2 files changed, 35 insertions(+), 19 deletions(-) diff --git a/src/strategies/mcts.rs b/src/strategies/mcts.rs index 3d3c7df..77cd9c2 100644 --- a/src/strategies/mcts.rs +++ b/src/strategies/mcts.rs @@ -3,6 +3,7 @@ use super::util::{timeout_signal, AtomicBox}; use rand::seq::SliceRandom; use rand::Rng; +use std::marker::PhantomData; use std::sync::atomic::{AtomicBool, AtomicI32, AtomicU32, Ordering}; use std::sync::Arc; use std::thread::spawn; @@ -135,22 +136,36 @@ impl MCTSOptions { /// A strategy that uses random playouts to explore the game tree to decide on the best move. /// This can be used without an Evaluator, just using the rules of the game. -#[derive(Clone)] -pub struct MonteCarloTreeSearch { +pub struct MonteCarloTreeSearch { // TODO: Evaluator options: MCTSOptions, max_rollouts: u32, max_time: Duration, timeout: Arc, + game_type: PhantomData, +} + +// derive is broken with PhantomData (https://github.com/rust-lang/rust/issues/26925) +impl Clone for MonteCarloTreeSearch { + fn clone(&self) -> Self { + Self { + options: self.options.clone(), + max_rollouts: self.max_rollouts, + max_time: self.max_time, + timeout: self.timeout.clone(), + game_type: PhantomData, + } + } } -impl MonteCarloTreeSearch { +impl MonteCarloTreeSearch { pub fn new(options: MCTSOptions) -> Self { Self { options, max_rollouts: 0, max_time: Duration::from_secs(5), timeout: Arc::new(AtomicBool::new(false)), + game_type: PhantomData, } } @@ -162,7 +177,7 @@ impl MonteCarloTreeSearch { // Returns score for this node. +1 for win of original player to move. // TODO: policy options: random, look 1 ahead for winning moves, BYO Evaluator. - fn rollout(&self, s: &G::S) -> i32 + fn rollout(&self, s: &G::S) -> i32 where G::S: Clone, { @@ -194,9 +209,7 @@ impl MonteCarloTreeSearch { } // Explore the tree, make a new node, rollout, backpropagate. - fn simulate( - &self, node: &Node, state: &mut G::S, mut force_rollout: bool, - ) -> Option + fn simulate(&self, node: &Node, state: &mut G::S, mut force_rollout: bool) -> Option where G::S: Clone, { @@ -204,7 +217,7 @@ impl MonteCarloTreeSearch { return None; } if force_rollout { - return node.update_stats(self.rollout::(state)); + return node.update_stats(self.rollout(state)); } let expansion = match node.expansion.get() { @@ -213,7 +226,7 @@ impl MonteCarloTreeSearch { // This is a leaf node. if node.visits.load(Ordering::SeqCst) < self.options.rollouts_before_expanding { // Just rollout from here. - return node.update_stats(self.rollout::(state)); + return node.update_stats(self.rollout(state)); } else { // Expand this node, and force a rollout when we recurse. force_rollout = true; @@ -234,7 +247,7 @@ impl MonteCarloTreeSearch { let next = node.best_child(1.).unwrap(); let m = next.m.as_ref().unwrap(); m.apply(state); - let result = -self.simulate::(next, state, force_rollout)?; + let result = -self.simulate(next, state, force_rollout)?; m.undo(state); // Backpropagate. @@ -242,8 +255,9 @@ impl MonteCarloTreeSearch { } } -impl Strategy for MonteCarloTreeSearch +impl Strategy for MonteCarloTreeSearch where + G: Send + 'static, G::S: Clone + Send + 'static, G::M: Copy + Send + Sync + 'static, { @@ -271,7 +285,7 @@ where let mcts = self.clone(); spawn(move || { for _ in 0..rollouts_per_thread { - if mcts.simulate::(&node, &mut state, false).is_none() { + if mcts.simulate(&node, &mut state, false).is_none() { break; } } @@ -281,7 +295,7 @@ where let mut state = s.clone(); for _ in 0..rollouts_per_thread + extra { - if self.simulate::(&root, &mut state, false).is_none() { + if self.simulate(&root, &mut state, false).is_none() { break; } } diff --git a/src/strategies/random.rs b/src/strategies/random.rs index bdb782a..b657303 100644 --- a/src/strategies/random.rs +++ b/src/strategies/random.rs @@ -2,24 +2,26 @@ use super::super::interface::*; use rand::seq::SliceRandom; +use std::marker::PhantomData; -pub struct Random { +pub struct Random { rng: rand::rngs::ThreadRng, + game_type: PhantomData, } -impl Random { - pub fn new() -> Random { - Random { rng: rand::thread_rng() } +impl Random { + pub fn new() -> Self { + Self { rng: rand::thread_rng(), game_type: PhantomData } } } -impl Default for Random { +impl Default for Random { fn default() -> Self { Random::new() } } -impl Strategy for Random +impl Strategy for Random where G::M: Copy, { From ab796d550ed4df350af709acb150eff209d07df6 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Fri, 7 Oct 2022 11:00:18 -0700 Subject: [PATCH 101/162] Allow compilation under wasm32-unknown-unknown. Remove multithreading dependencies and tell rand to build with js bindings. --- Cargo.toml | 7 ++++- src/lib.rs | 3 ++ src/strategies/atomic_box.rs | 53 ++++++++++++++++++++++++++++++++++ src/strategies/mcts.rs | 3 +- src/strategies/mod.rs | 5 ++++ src/strategies/util.rs | 56 ++---------------------------------- src/util.rs | 36 +++++++++++++++++++++++ 7 files changed, 108 insertions(+), 55 deletions(-) create mode 100644 src/strategies/atomic_box.rs diff --git a/Cargo.toml b/Cargo.toml index d282676..17fedbb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,10 +11,15 @@ keywords = ["ai", "game", "minimax", "negamax"] license = "MIT" [dependencies] -num_cpus = "1.0" rand = "0.8" + +[target.'cfg(not(all(target_arch="wasm32", target_os="unknown")))'.dependencies] +num_cpus = "1.0" rayon = "^1.5" +[target.'cfg(all(target_arch="wasm32", target_os="unknown"))'.dependencies] +getrandom = { version = "0.2", features = ["js"]} + [dev-dependencies] bencher = "0.1.5" diff --git a/src/lib.rs b/src/lib.rs index 88d5e4b..ecce62f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,9 +8,12 @@ pub use interface::{ Evaluation, Evaluator, Game, Move, Strategy, Winner, Zobrist, BEST_EVAL, WORST_EVAL, }; pub use strategies::iterative::{IterativeOptions, IterativeSearch, Replacement}; +#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] pub use strategies::lazy_smp::{LazySmp, LazySmpOptions}; +#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] pub use strategies::mcts::{MCTSOptions, MonteCarloTreeSearch}; pub use strategies::negamax::Negamax; pub use strategies::random::Random; +#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] pub use strategies::ybw::{ParallelYbw, YbwOptions}; pub use util::perft; diff --git a/src/strategies/atomic_box.rs b/src/strategies/atomic_box.rs new file mode 100644 index 0000000..c02ea9d --- /dev/null +++ b/src/strategies/atomic_box.rs @@ -0,0 +1,53 @@ +use std::sync::atomic::{AtomicPtr, Ordering}; + +// An insert-only lock-free Option> +pub(super) struct AtomicBox(AtomicPtr); + +impl Default for AtomicBox { + fn default() -> Self { + Self(AtomicPtr::default()) + } +} + +impl AtomicBox { + // Tries to set the AtomicBox to this value if empty. + // Returns a reference to whatever is in the box. + pub(super) fn try_set(&self, value: Box) -> &T { + let ptr = Box::into_raw(value); + // Try to replace nullptr with the value. + let ret_ptr = if let Err(new_ptr) = + self.0.compare_exchange(std::ptr::null_mut(), ptr, Ordering::SeqCst, Ordering::SeqCst) + { + // If someone beat us to it, return the original drop the new one. + unsafe { drop(Box::from_raw(ptr)) }; + new_ptr + } else { + ptr + }; + unsafe { ret_ptr.as_ref().unwrap() } + } + + pub(super) fn get(&self) -> Option<&T> { + let ptr = self.0.load(Ordering::Relaxed); + unsafe { ptr.as_ref() } + } +} + +impl Drop for AtomicBox { + fn drop(&mut self) { + let ptr = *self.0.get_mut(); + if !ptr.is_null() { + unsafe { drop(Box::from_raw(ptr)) }; + } + } +} + +#[test] +fn test_atomic_box() { + let b = AtomicBox::::default(); + assert_eq!(None, b.get()); + b.try_set(Box::new(3)); + assert_eq!(Some(&3), b.get()); + b.try_set(Box::new(4)); + assert_eq!(Some(&3), b.get()); +} diff --git a/src/strategies/mcts.rs b/src/strategies/mcts.rs index 77cd9c2..4508ffb 100644 --- a/src/strategies/mcts.rs +++ b/src/strategies/mcts.rs @@ -1,5 +1,6 @@ use super::super::interface::*; -use super::util::{timeout_signal, AtomicBox}; +use super::atomic_box::AtomicBox; +use super::util::timeout_signal; use rand::seq::SliceRandom; use rand::Rng; diff --git a/src/strategies/mod.rs b/src/strategies/mod.rs index b6b4b58..c221f28 100644 --- a/src/strategies/mod.rs +++ b/src/strategies/mod.rs @@ -1,11 +1,16 @@ //! Strategy implementations. pub mod iterative; +#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] pub mod lazy_smp; +#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] pub mod mcts; pub mod negamax; pub mod random; +#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] pub mod ybw; +#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] +mod atomic_box; mod table; mod util; diff --git a/src/strategies/util.rs b/src/strategies/util.rs index 8d9e822..6a1c1eb 100644 --- a/src/strategies/util.rs +++ b/src/strategies/util.rs @@ -1,6 +1,6 @@ use super::super::interface::*; -use std::sync::atomic::{AtomicBool, AtomicPtr, Ordering}; +use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; use std::thread::{sleep, spawn}; use std::time::Duration; @@ -90,6 +90,7 @@ impl ValueMove { Self { value, m } } + #[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] pub(super) fn max(&mut self, value: Evaluation, m: M) { if value > self.value { self.value = value; @@ -97,59 +98,8 @@ impl ValueMove { } } + #[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] pub(super) fn into_inner(self) -> (Evaluation, M) { (self.value, self.m) } } - -// An insert-only lock-free Option> -pub(super) struct AtomicBox(AtomicPtr); - -impl Default for AtomicBox { - fn default() -> Self { - Self(AtomicPtr::default()) - } -} - -impl AtomicBox { - // Tries to set the AtomicBox to this value if empty. - // Returns a reference to whatever is in the box. - pub(super) fn try_set(&self, value: Box) -> &T { - let ptr = Box::into_raw(value); - // Try to replace nullptr with the value. - let ret_ptr = if let Err(new_ptr) = - self.0.compare_exchange(std::ptr::null_mut(), ptr, Ordering::SeqCst, Ordering::SeqCst) - { - // If someone beat us to it, return the original drop the new one. - unsafe { drop(Box::from_raw(ptr)) }; - new_ptr - } else { - ptr - }; - unsafe { ret_ptr.as_ref().unwrap() } - } - - pub(super) fn get(&self) -> Option<&T> { - let ptr = self.0.load(Ordering::Relaxed); - unsafe { ptr.as_ref() } - } -} - -impl Drop for AtomicBox { - fn drop(&mut self) { - let ptr = *self.0.get_mut(); - if !ptr.is_null() { - unsafe { drop(Box::from_raw(ptr)) }; - } - } -} - -#[test] -fn test_atomic_box() { - let b = AtomicBox::::default(); - assert_eq!(None, b.get()); - b.try_set(Box::new(3)); - assert_eq!(Some(&3), b.get()); - b.try_set(Box::new(4)); - assert_eq!(Some(&3), b.get()); -} diff --git a/src/util.rs b/src/util.rs index 72c2c8e..5a932be 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1,10 +1,12 @@ //! Utility functions for testing, and tests. +#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] extern crate rayon; use super::interface; use super::interface::{Game, Move}; +#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] use rayon::prelude::*; use std::default::Default; use std::time::Instant; @@ -59,6 +61,7 @@ impl MovePool { } } +#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] fn perft_recurse( pool: &mut MovePool, state: &mut G::S, depth: u8, single_thread_cutoff: u8, ) -> u64 @@ -103,6 +106,39 @@ where n } +#[cfg(all(target_arch = "wasm32", target_os = "unknown"))] +fn perft_recurse( + pool: &mut MovePool, state: &mut G::S, depth: u8, single_thread_cutoff: u8, +) -> u64 +where + ::S: Clone + Sync, + ::M: Copy + Sync, +{ + if depth == 0 { + return 1; + } + if G::get_winner(state).is_some() { + // Apparently perft rules only count positions at the target depth. + return 0; + } + let mut moves = pool.alloc(); + G::generate_moves(state, &mut moves); + let n = if depth == 1 { + moves.len() as u64 + } else { + // Single-thread recurse. + let mut count = 0; + for m in moves.iter() { + m.apply(state); + count += perft_recurse::(pool, state, depth - 1, single_thread_cutoff); + m.undo(state); + } + count + }; + pool.free(moves); + n +} + pub fn perft(state: &mut ::S, max_depth: u8, multi_threaded: bool) -> Vec where ::S: Clone + Sync, From 524df7bc934f983da35d4e22a63616852f0695a0 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Fri, 7 Oct 2022 19:10:52 -0700 Subject: [PATCH 102/162] Use a version of Instant that is wasm-compatible. --- Cargo.toml | 1 + src/strategies/iterative.rs | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 17fedbb..4c9640c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,7 @@ keywords = ["ai", "game", "minimax", "negamax"] license = "MIT" [dependencies] +instant = { version = "0.1", features = ["wasm-bindgen"] } rand = "0.8" [target.'cfg(not(all(target_arch="wasm32", target_os="unknown")))'.dependencies] diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index de11144..cf23bce 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -9,11 +9,12 @@ use super::super::util::*; use super::table::*; use super::util::*; +use instant::Instant; use rand::prelude::SliceRandom; use std::cmp::max; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; -use std::time::{Duration, Instant}; +use std::time::Duration; #[derive(Copy, Clone, Debug, Eq, PartialEq)] /// Strategies for when to overwrite entries in the transition table. From 6b1dc197509a13cf6d41fcca11ffb7602a87ee2c Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Fri, 7 Oct 2022 23:43:47 -0700 Subject: [PATCH 103/162] Create timer mechanism for wasm. --- src/strategies/iterative.rs | 66 +++++++++++++++---- src/strategies/lazy_smp.rs | 1 + src/strategies/mcts.rs | 3 +- src/strategies/mod.rs | 2 +- .../{atomic_box.rs => sync_util.rs} | 17 ++++- src/strategies/util.rs | 17 ----- src/strategies/ybw.rs | 1 + 7 files changed, 75 insertions(+), 32 deletions(-) rename src/strategies/{atomic_box.rs => sync_util.rs} (72%) diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index cf23bce..3bc10fc 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -6,13 +6,17 @@ use super::super::interface::*; use super::super::util::*; +#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] +use super::sync_util::timeout_signal; use super::table::*; use super::util::*; use instant::Instant; use rand::prelude::SliceRandom; use std::cmp::max; +#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] use std::sync::atomic::{AtomicBool, Ordering}; +#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] use std::sync::Arc; use std::time::Duration; @@ -227,7 +231,12 @@ impl IterativeOptions { } pub(super) struct Negamaxer { + #[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] timeout: Arc, + #[cfg(all(target_arch = "wasm32", target_os = "unknown"))] + deadline: Instant, + #[cfg(all(target_arch = "wasm32", target_os = "unknown"))] + timeout_counter: u32, pub(super) table: T, move_pool: MovePool<::M>, eval: E, @@ -248,7 +257,12 @@ where { pub(super) fn new(table: T, eval: E, opts: IterativeOptions) -> Self { Self { + #[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] timeout: Arc::new(AtomicBool::new(false)), + #[cfg(all(target_arch = "wasm32", target_os = "unknown"))] + deadline: Instant::now(), + #[cfg(all(target_arch = "wasm32", target_os = "unknown"))] + timeout_counter: 1000, table, eval, move_pool: MovePool::default(), @@ -259,21 +273,55 @@ where } } - pub(super) fn set_timeout(&mut self, timeout: Arc) { - self.timeout = timeout; - } - fn reset_stats(&mut self) { self.nodes_explored = 0; self.total_generate_move_calls = 0; self.total_generated_moves = 0; } + #[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] + pub(super) fn set_timeout(&mut self, timeout: Arc) { + self.timeout = timeout; + } + + #[cfg(all(target_arch = "wasm32", target_os = "unknown"))] + fn reset_timeout(&mut self, duration: Duration) { + self.timeout_counter = if duration == Duration::new(0, 0) { + // Too high counter that never hits the maximum. + 1000 + } else { + 0 + }; + self.deadline = Instant::now() + duration; + } + #[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] + fn reset_timeout(&mut self, duration: Duration) { + self.set_timeout(if duration == Duration::new(0, 0) { + Arc::new(AtomicBool::new(false)) + } else { + timeout_signal(duration) + }); + } + + #[cfg(all(target_arch = "wasm32", target_os = "unknown"))] + fn timeout_check(&mut self) -> bool { + self.timeout_counter += 1; + if self.timeout_counter != 100 { + return false; + } + self.timeout_counter = 0; + Instant::now() >= self.deadline + } + #[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] + fn timeout_check(&mut self) -> bool { + self.timeout.load(Ordering::Relaxed) + } + // Negamax only among noisy moves. fn noisy_negamax( &mut self, s: &mut ::S, depth: u8, mut alpha: Evaluation, beta: Evaluation, ) -> Option { - if self.timeout.load(Ordering::Relaxed) { + if self.timeout_check() { return None; } if let Some(winner) = E::G::get_winner(s) { @@ -310,7 +358,7 @@ where &mut self, s: &mut ::S, depth: u8, mut alpha: Evaluation, mut beta: Evaluation, ) -> Option { - if self.timeout.load(Ordering::Relaxed) { + if self.timeout_check() { return None; } @@ -538,11 +586,7 @@ where self.actual_depth = 0; let start_time = Instant::now(); // Start timer if configured. - self.negamaxer.set_timeout(if self.max_time == Duration::new(0, 0) { - Arc::new(AtomicBool::new(false)) - } else { - timeout_signal(self.max_time) - }); + self.negamaxer.reset_timeout(self.max_time); let root_hash = s.zobrist_hash(); let mut s_clone = s.clone(); diff --git a/src/strategies/lazy_smp.rs b/src/strategies/lazy_smp.rs index 91f860c..370a0b5 100644 --- a/src/strategies/lazy_smp.rs +++ b/src/strategies/lazy_smp.rs @@ -10,6 +10,7 @@ extern crate rand; use super::super::interface::*; use super::iterative::{IterativeOptions, Negamaxer}; +use super::sync_util::timeout_signal; use super::table::*; use super::util::*; diff --git a/src/strategies/mcts.rs b/src/strategies/mcts.rs index 4508ffb..bbb264f 100644 --- a/src/strategies/mcts.rs +++ b/src/strategies/mcts.rs @@ -1,6 +1,5 @@ use super::super::interface::*; -use super::atomic_box::AtomicBox; -use super::util::timeout_signal; +use super::sync_util::*; use rand::seq::SliceRandom; use rand::Rng; diff --git a/src/strategies/mod.rs b/src/strategies/mod.rs index c221f28..175db2c 100644 --- a/src/strategies/mod.rs +++ b/src/strategies/mod.rs @@ -11,6 +11,6 @@ pub mod random; pub mod ybw; #[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] -mod atomic_box; +mod sync_util; mod table; mod util; diff --git a/src/strategies/atomic_box.rs b/src/strategies/sync_util.rs similarity index 72% rename from src/strategies/atomic_box.rs rename to src/strategies/sync_util.rs index c02ea9d..da779b9 100644 --- a/src/strategies/atomic_box.rs +++ b/src/strategies/sync_util.rs @@ -1,4 +1,19 @@ -use std::sync::atomic::{AtomicPtr, Ordering}; +use std::sync::atomic::{AtomicBool, AtomicPtr, Ordering}; +use std::sync::Arc; +use std::thread::{sleep, spawn}; +use std::time::Duration; + +pub(super) fn timeout_signal(dur: Duration) -> Arc { + // Theoretically we could include an async runtime to do this and use + // fewer threads, but the stdlib implementation is only a few lines... + let signal = Arc::new(AtomicBool::new(false)); + let signal2 = signal.clone(); + spawn(move || { + sleep(dur); + signal2.store(true, Ordering::Relaxed); + }); + signal +} // An insert-only lock-free Option> pub(super) struct AtomicBox(AtomicPtr); diff --git a/src/strategies/util.rs b/src/strategies/util.rs index 6a1c1eb..7ad1f05 100644 --- a/src/strategies/util.rs +++ b/src/strategies/util.rs @@ -1,10 +1,5 @@ use super::super::interface::*; -use std::sync::atomic::{AtomicBool, Ordering}; -use std::sync::Arc; -use std::thread::{sleep, spawn}; -use std::time::Duration; - // For values near winning and losing values, push them slightly closer to zero. // A win in 3 moves (BEST-3) will be chosen over a win in 5 moves (BEST-5). // A loss in 5 moves (WORST+5) will be chosen over a loss in 3 moves (WORST+3). @@ -29,18 +24,6 @@ pub(super) fn unclamp_value(value: Evaluation) -> Evaluation { } } -pub(super) fn timeout_signal(dur: Duration) -> Arc { - // Theoretically we could include an async runtime to do this and use - // fewer threads, but the stdlib implementation is only a few lines... - let signal = Arc::new(AtomicBool::new(false)); - let signal2 = signal.clone(); - spawn(move || { - sleep(dur); - signal2.store(true, Ordering::Relaxed); - }); - signal -} - // Return a unique id for humans for this move. pub(super) fn move_id(s: &mut ::S, m: Option<::M>) -> String where diff --git a/src/strategies/ybw.rs b/src/strategies/ybw.rs index 6283656..6ba81c0 100644 --- a/src/strategies/ybw.rs +++ b/src/strategies/ybw.rs @@ -9,6 +9,7 @@ extern crate rayon; use super::super::interface::*; use super::iterative::IterativeOptions; +use super::sync_util::timeout_signal; use super::table::*; use super::util::*; From 05ce848291741dd57325aa1ebeb0bcf573757bbb Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Tue, 18 Oct 2022 09:51:01 -0700 Subject: [PATCH 104/162] Tweak and refactor null move pruning --- src/strategies/iterative.rs | 40 +++++++++++++++++++++++-------------- src/strategies/ybw.rs | 40 +++++++++++++++++++++++-------------- 2 files changed, 50 insertions(+), 30 deletions(-) diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index 3bc10fc..f47cb82 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -317,6 +317,29 @@ where self.timeout.load(Ordering::Relaxed) } + fn null_move_check( + &mut self, s: &mut ::S, depth: u8, beta: Evaluation, + ) -> Option { + if let (Some(depth_reduction), Some(null_move)) = + (self.opts.null_move_depth, E::G::null_move(s)) + { + // Default to a minimum of depth=1 after null moving. + if depth > depth_reduction && + // If the position already seems pretty awesome. + self.eval.evaluate(s) >= beta + { + // If we just pass and let the opponent play this position (at reduced depth), + null_move.apply(s); + let value = -self.negamax(s, depth - depth_reduction, -beta, -beta + 1)?; + null_move.undo(s); + // is the result still so good that we shouldn't bother with a full search? + return Some(value); + } + } + // If we didn't check, return a low value that won't trigger beta cutoff. + Some(WORST_EVAL) + } + // Negamax only among noisy moves. fn noisy_negamax( &mut self, s: &mut ::S, depth: u8, mut alpha: Evaluation, beta: Evaluation, @@ -380,21 +403,8 @@ where return Some(value); } - if let (Some(depth_reduction), Some(null_move)) = - (self.opts.null_move_depth, E::G::null_move(s)) - { - if depth >= depth_reduction { - // If we just pass and let the opponent play this position (at reduced depth), - null_move.apply(s); - let value = -self.negamax(s, depth - depth_reduction, -beta, -beta + 1)?; - null_move.undo(s); - // is the result still so good that we shouldn't bother with a full search? - if value >= beta { - // This value was at a fake depth, so don't assume too - // much about the lowerbound. - return Some(beta); - } - } + if self.null_move_check(s, depth, beta)? >= beta { + return Some(beta); } let mut moves = self.move_pool.alloc(); diff --git a/src/strategies/ybw.rs b/src/strategies/ybw.rs index 6ba81c0..2c00f5a 100644 --- a/src/strategies/ybw.rs +++ b/src/strategies/ybw.rs @@ -90,6 +90,29 @@ where self.pv.lock().unwrap().clone() } + fn null_move_check( + &self, s: &mut ::S, depth: u8, beta: Evaluation, + ) -> Option { + if let (Some(depth_reduction), Some(null_move)) = + (self.opts.null_move_depth, E::G::null_move(s)) + { + // Default to a minimum of depth=1 after null moving. + if depth > depth_reduction && + // If the position already seems pretty awesome. + self.eval.evaluate(s) >= beta + { + // If we just pass and let the opponent play this position (at reduced depth), + null_move.apply(s); + let value = -self.negamax(s, depth - depth_reduction, -beta, -beta + 1)?; + null_move.undo(s); + // is the result still so good that we shouldn't bother with a full search? + return Some(value); + } + } + // If we didn't check, return a low value that won't trigger beta cutoff. + Some(WORST_EVAL) + } + // Negamax only among noisy moves. fn noisy_negamax( &self, s: &mut ::S, depth: u8, mut alpha: Evaluation, beta: Evaluation, @@ -158,21 +181,8 @@ where return Some(value); } - if let (Some(depth_reduction), Some(null_move)) = - (self.opts.null_move_depth, E::G::null_move(s)) - { - if depth >= depth_reduction { - // If we just pass and let the opponent play this position (at reduced depth), - null_move.apply(s); - let value = -self.negamax(s, depth - depth_reduction, -beta, -beta + 1)?; - null_move.undo(s); - // is the result still so good that we shouldn't bother with a full search? - if value >= beta { - // This value was at a fake depth, so don't assume too - // much about the lowerbound. - return Some(beta); - } - } + if self.null_move_check(s, depth, beta)? >= beta { + return Some(beta); } //let mut moves = self.move_pool.alloc(); From 1b04a970b92826855739e19e0a416d33d09552d6 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Wed, 19 Oct 2022 09:29:34 -0700 Subject: [PATCH 105/162] Add singular extension option --- src/strategies/iterative.rs | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index f47cb82..029fab1 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -122,6 +122,7 @@ pub struct IterativeOptions { pub(super) strategy: Replacement, pub(super) null_window_search: bool, pub(super) null_move_depth: Option, + pub(super) singular_extension: bool, pub(super) aspiration_window: Option, pub(super) mtdf: bool, pub(super) step_increment: u8, @@ -137,6 +138,7 @@ impl IterativeOptions { strategy: Replacement::TwoTier, null_window_search: true, null_move_depth: None, + singular_extension: false, aspiration_window: None, mtdf: false, step_increment: 1, @@ -183,6 +185,13 @@ impl IterativeOptions { self } + /// Whether to extend a branch of the search (by 1) if there is only one + /// move (or only one reasonable move). + pub fn with_singular_extension(mut self) -> Self { + self.singular_extension = true; + self + } + /// Whether to search first in a narrow window around the previous root /// value on each iteration. pub fn with_aspiration_window(mut self, window: Evaluation) -> Self { @@ -378,7 +387,7 @@ where // Recursively compute negamax on the game state. Returns None if it hits the timeout. pub(super) fn negamax( - &mut self, s: &mut ::S, depth: u8, mut alpha: Evaluation, + &mut self, s: &mut ::S, mut depth: u8, mut alpha: Evaluation, mut beta: Evaluation, ) -> Option { if self.timeout_check() { @@ -416,6 +425,11 @@ where return Some(WORST_EVAL); } + // TODO: Also do a pre-search to look for moves much better than others. + if self.opts.singular_extension && moves.len() == 1 { + depth += 1; + } + // Reorder moves. if depth >= self.opts.min_reorder_moves_depth { self.eval.reorder_moves(s, &mut moves); From 52b3210343addebd7a360b798cd7b6cfd155c167 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Wed, 19 Oct 2022 16:16:19 -0700 Subject: [PATCH 106/162] Add countermove and history table to IterativeSearch. The countermove table seems to produce some better results at near zero cost. The history table gives an even bigger boost but at a pretty high cost. Need some thread-local storage to add these to YBW. --- src/interface.rs | 8 +++++ src/strategies/iterative.rs | 50 ++++++++++++++++++++---------- src/strategies/lazy_smp.rs | 3 +- src/strategies/table.rs | 61 ++++++++++++++++++++++++++++++++++++- src/strategies/util.rs | 9 ++++++ tests/strategies.rs | 5 ++- 6 files changed, 117 insertions(+), 19 deletions(-) diff --git a/src/interface.rs b/src/interface.rs index 047210d..45ac397 100644 --- a/src/interface.rs +++ b/src/interface.rs @@ -80,6 +80,14 @@ pub trait Move { fn notation(&self, _state: &::S) -> Option { None } + /// Return a small index for this move for position-independent tables. + fn table_index(&self) -> u16 { + 0 + } + /// Maximum index value. + fn max_table_index() -> u16 { + 0 + } } /// The result of playing a game until it finishes. diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index 029fab1..681e006 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -128,6 +128,8 @@ pub struct IterativeOptions { pub(super) step_increment: u8, pub(super) max_quiescence_depth: u8, pub(super) min_reorder_moves_depth: u8, + pub(super) countermove_table: bool, + pub(super) countermove_history_table: bool, pub(super) verbose: bool, } @@ -144,6 +146,8 @@ impl IterativeOptions { step_increment: 1, max_quiescence_depth: 0, min_reorder_moves_depth: u8::MAX, + countermove_table: false, + countermove_history_table: false, verbose: false, } } @@ -232,6 +236,21 @@ impl IterativeOptions { self } + /// Enable the countermove table, which reorders to the front moves that + /// have worked to counter the previous move in other branches. + pub fn with_countermoves(mut self) -> Self { + self.countermove_table = true; + self + } + + /// Enable the countermove history table. It keeps a counter for moves + /// that have caused beta cutoffs in other branches, and reorders moves + /// based on this counter. + pub fn with_countermove_history(mut self) -> Self { + self.countermove_history_table = true; + self + } + /// Enable verbose print statements of the ongoing performance of the search. pub fn verbose(mut self) -> Self { self.verbose = true; @@ -247,6 +266,7 @@ pub(super) struct Negamaxer { #[cfg(all(target_arch = "wasm32", target_os = "unknown"))] timeout_counter: u32, pub(super) table: T, + pub(super) countermoves: CounterMoves<::M>, move_pool: MovePool<::M>, eval: E, @@ -273,6 +293,7 @@ where #[cfg(all(target_arch = "wasm32", target_os = "unknown"))] timeout_counter: 1000, table, + countermoves: CounterMoves::new(opts.countermove_table, opts.countermove_history_table), eval, move_pool: MovePool::default(), opts, @@ -339,7 +360,7 @@ where { // If we just pass and let the opponent play this position (at reduced depth), null_move.apply(s); - let value = -self.negamax(s, depth - depth_reduction, -beta, -beta + 1)?; + let value = -self.negamax(s, None, depth - depth_reduction, -beta, -beta + 1)?; null_move.undo(s); // is the result still so good that we shouldn't bother with a full search? return Some(value); @@ -387,8 +408,8 @@ where // Recursively compute negamax on the game state. Returns None if it hits the timeout. pub(super) fn negamax( - &mut self, s: &mut ::S, mut depth: u8, mut alpha: Evaluation, - mut beta: Evaluation, + &mut self, s: &mut ::S, prev_move: Option<::M>, mut depth: u8, + mut alpha: Evaluation, mut beta: Evaluation, ) -> Option { if self.timeout_check() { return None; @@ -434,14 +455,9 @@ where if depth >= self.opts.min_reorder_moves_depth { self.eval.reorder_moves(s, &mut moves); } + self.countermoves.reorder(prev_move, &mut moves); if let Some(good) = good_move { - // Move predicted good move to the front. - for i in 0..moves.len() { - if moves[i] == good { - moves[0..i + 1].rotate_right(1); - break; - } - } + move_to_front(good, &mut moves); } let mut best = WORST_EVAL; @@ -450,15 +466,15 @@ where for &m in moves.iter() { m.apply(s); let value = if null_window { - let probe = -self.negamax(s, depth - 1, -alpha - 1, -alpha)?; + let probe = -self.negamax(s, Some(m), depth - 1, -alpha - 1, -alpha)?; if probe > alpha && probe < beta { // Full search fallback. - -self.negamax(s, depth - 1, -beta, -probe)? + -self.negamax(s, Some(m), depth - 1, -beta, -probe)? } else { probe } } else { - -self.negamax(s, depth - 1, -beta, -alpha)? + -self.negamax(s, Some(m), depth - 1, -beta, -alpha)? }; m.undo(s); if value > best { @@ -472,6 +488,7 @@ where null_window = self.opts.null_window_search; } if alpha >= beta { + self.countermoves.update(prev_move, m); break; } } @@ -492,7 +509,7 @@ where } let alpha = max(target.saturating_sub(window), WORST_EVAL); let beta = target.saturating_add(window); - self.negamax(s, depth, alpha, beta)?; + self.negamax(s, None, depth, alpha, beta)?; Some(()) } @@ -503,7 +520,7 @@ where let beta = BEST_EVAL; for value_move in moves.iter_mut() { value_move.m.apply(s); - let value = -self.negamax(s, depth - 1, -beta, -alpha)?; + let value = -self.negamax(s, Some(value_move.m), depth - 1, -beta, -alpha)?; value_move.m.undo(s); alpha = max(alpha, value); @@ -586,7 +603,7 @@ where depth, beta, lowerbound, upperbound ); } - guess = self.negamaxer.negamax(s, depth, beta - 1, beta)?; + guess = self.negamaxer.negamax(s, None, depth, beta - 1, beta)?; if guess < beta { upperbound = guess; } else { @@ -604,6 +621,7 @@ where { fn choose_move(&mut self, s: &::S) -> Option<::M> { self.negamaxer.table.advance_generation(); + self.negamaxer.countermoves.advance_generation(E::G::null_move(s)); // Reset stats. self.nodes_explored.clear(); self.negamaxer.reset_stats(); diff --git a/src/strategies/lazy_smp.rs b/src/strategies/lazy_smp.rs index 370a0b5..4dbdad8 100644 --- a/src/strategies/lazy_smp.rs +++ b/src/strategies/lazy_smp.rs @@ -175,6 +175,7 @@ where &mut beta, ); + self.negamaxer.countermoves.advance_generation(E::G::null_move(&search.state)); // Randomize the first level of moves. let mut moves = Vec::new(); E::G::generate_moves(&search.state, &mut moves); @@ -183,7 +184,7 @@ where for m in moves { m.apply(&mut search.state); if let Some(value) = - self.negamaxer.negamax(&mut search.state, depth - 1, alpha, beta) + self.negamaxer.negamax(&mut search.state, Some(m), depth - 1, alpha, beta) { alpha = max(alpha, -value); } else { diff --git a/src/strategies/table.rs b/src/strategies/table.rs index 0a652b2..70474cf 100644 --- a/src/strategies/table.rs +++ b/src/strategies/table.rs @@ -1,4 +1,4 @@ -use super::util::unclamp_value; +use super::util::{move_to_front, unclamp_value}; use crate::interface::*; use std::cmp::{max, min}; use std::sync::atomic::{AtomicU32, AtomicU8, Ordering}; @@ -376,3 +376,62 @@ impl ConcurrentTable for LockfreeTable { self.generation.fetch_add(1, Ordering::SeqCst); } } + +// A single-threaded utility to find moves that have done well in other branches. +pub(super) struct CounterMoves { + countermove_enabled: bool, + history_enabled: bool, + // For a given move index, which followup most recently led to a beta cutoff? + countermove_table: Vec, + // For each move index, how many beta cutoffs has it produced? + history_table: Vec, +} + +impl CounterMoves { + pub(super) fn new(countermove_enabled: bool, history_enabled: bool) -> Self { + Self { + countermove_enabled, + history_enabled, + countermove_table: Vec::new(), + history_table: Vec::new(), + } + } + + pub(super) fn reorder(&self, prev: Option, moves: &mut [M]) { + if !self.history_table.is_empty() { + // Stable sort to preserve previous orderings. + moves.sort_by_key(|m| !self.history_table[m.table_index() as usize]); + } + if let Some(prev) = prev { + if let Some(response) = self.countermove_table.get(prev.table_index() as usize) { + move_to_front(*response, moves); + } + } + } + + pub(super) fn update(&mut self, prev: Option, m: M) { + if let Some(prev) = prev { + if let Some(entry) = self.countermove_table.get_mut(prev.table_index() as usize) { + *entry = m; + } + } + if let Some(entry) = self.history_table.get_mut(m.table_index() as usize) { + *entry = 1u32.saturating_add(*entry); + } + } + + pub(super) fn advance_generation(&mut self, null_move: Option) { + // Lazily allocate tables + if self.countermove_enabled && self.countermove_table.is_empty() { + if let Some(m) = null_move { + self.countermove_table = vec![m; M::max_table_index() as usize + 1]; + } + } + if self.history_enabled && self.history_table.is_empty() { + self.history_table = vec![0; M::max_table_index() as usize + 1]; + } + + // Partially degrade old values, to bias towards new data. + self.history_table.iter_mut().for_each(|n| *n >>= 1); + } +} diff --git a/src/strategies/util.rs b/src/strategies/util.rs index 7ad1f05..0a6a1ed 100644 --- a/src/strategies/util.rs +++ b/src/strategies/util.rs @@ -62,6 +62,15 @@ where out } +pub(super) fn move_to_front(m: M, moves: &mut [M]) { + for i in 0..moves.len() { + if moves[i] == m { + moves[0..i + 1].rotate_right(1); + break; + } + } +} + // This exists to be wrapped in a mutex, because it didn't work when I tried a tuple. pub(super) struct ValueMove { pub(super) value: Evaluation, diff --git a/tests/strategies.rs b/tests/strategies.rs index d847a29..aacd01d 100644 --- a/tests/strategies.rs +++ b/tests/strategies.rs @@ -212,7 +212,10 @@ fn compare_plain_negamax() { // more parallelism in the parallel strategies. #[test] fn compare_deep_negamax() { - let opt = IterativeOptions::new().with_table_byte_size(64000); + let opt = IterativeOptions::new() + .with_table_byte_size(64000) + .with_countermoves() + .with_countermove_history(); for iter in 0..10 { for max_depth in 1..10 { let b = generate_random_state(10); From af39d4186f3e8ec39ad0dbc6b3a0cfa79f3ecc80 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Wed, 26 Oct 2022 16:35:19 -0700 Subject: [PATCH 107/162] Make some config fields public. --- src/strategies/iterative.rs | 6 +++--- src/strategies/ybw.rs | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index 681e006..0aed245 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -118,7 +118,7 @@ impl Table for TranspositionTable { /// Options to use for the iterative search engines. #[derive(Clone, Copy)] pub struct IterativeOptions { - pub(super) table_byte_size: usize, + pub table_byte_size: usize, pub(super) strategy: Replacement, pub(super) null_window_search: bool, pub(super) null_move_depth: Option, @@ -130,13 +130,13 @@ pub struct IterativeOptions { pub(super) min_reorder_moves_depth: u8, pub(super) countermove_table: bool, pub(super) countermove_history_table: bool, - pub(super) verbose: bool, + pub verbose: bool, } impl IterativeOptions { pub fn new() -> Self { IterativeOptions { - table_byte_size: 1_000_000, + table_byte_size: 1 << 20, strategy: Replacement::TwoTier, null_window_search: true, null_move_depth: None, diff --git a/src/strategies/ybw.rs b/src/strategies/ybw.rs index 2c00f5a..ff77a7d 100644 --- a/src/strategies/ybw.rs +++ b/src/strategies/ybw.rs @@ -22,9 +22,9 @@ use std::time::{Duration, Instant}; /// Options to use for the parallel search engine. #[derive(Clone, Copy)] pub struct YbwOptions { - num_threads: Option, + pub num_threads: Option, serial_cutoff_depth: u8, - background_pondering: bool, + pub background_pondering: bool, } impl YbwOptions { @@ -58,7 +58,7 @@ impl YbwOptions { self } - fn num_threads(self) -> usize { + pub fn num_threads(self) -> usize { self.num_threads.unwrap_or_else(num_cpus::get) } } From 3b0e76cde05686de031fc095a0a42caa9b450008 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Mon, 31 Oct 2022 10:26:02 -0700 Subject: [PATCH 108/162] Exit early when guaranteed to win or lose. --- src/strategies/iterative.rs | 11 +++++------ src/strategies/ybw.rs | 9 ++++----- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index 0aed245..0884518 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -634,7 +634,6 @@ where let mut s_clone = s.clone(); let mut best_move = None; let mut interval_start; - let mut maxxed = false; // Store the moves so they can be reordered every iteration. let mut moves = Vec::new(); E::G::generate_moves(&s_clone, &mut moves); @@ -662,7 +661,7 @@ where // Timeout. break; } - if self.opts.verbose && !maxxed { + if self.opts.verbose { if let Some(entry) = self.negamaxer.table.lookup(root_hash) { let end = Instant::now(); let interval = end - interval_start; @@ -687,7 +686,7 @@ where let entry = self.negamaxer.table.lookup(root_hash).unwrap(); best_move = entry.best_move; - if self.opts.verbose && !maxxed { + if self.opts.verbose { let interval = Instant::now() - interval_start; eprintln!( "Iterative fullsearch depth{:>2} took{:>5}ms; value{:>6} bestmove={}", @@ -696,9 +695,6 @@ where entry.value_string(), move_id::(&mut s_clone, best_move) ); - if unclamp_value(entry.value).abs() == BEST_EVAL { - maxxed = true; - } } self.actual_depth = max(self.actual_depth, depth); @@ -707,6 +703,9 @@ where self.prev_value = entry.value; depth += self.opts.step_increment; self.negamaxer.table.populate_pv(&mut self.pv, &mut s_clone); + if unclamp_value(entry.value).abs() == BEST_EVAL { + break; + } } self.wall_time = start_time.elapsed(); if self.opts.verbose { diff --git a/src/strategies/ybw.rs b/src/strategies/ybw.rs index ff77a7d..1154c83 100644 --- a/src/strategies/ybw.rs +++ b/src/strategies/ybw.rs @@ -299,7 +299,6 @@ where let mut best_move = None; let mut best_value = 0; let mut interval_start; - let mut maxxed = false; let mut pv = String::new(); let mut depth = max_depth % self.opts.step_increment; @@ -327,7 +326,7 @@ where best_move = entry.best_move; best_value = entry.value; - if self.opts.verbose && !background && !maxxed { + if self.opts.verbose && !background { let interval = Instant::now() - interval_start; eprintln!( "Ybw search (threads={}) depth{:>2} took{:>5}ms; returned{:>5}; bestmove {}", @@ -337,9 +336,6 @@ where entry.value_string(), move_id::(&mut state, best_move) ); - if unclamp_value(entry.value).abs() == BEST_EVAL { - maxxed = true; - } } depth += self.opts.step_increment; @@ -347,6 +343,9 @@ where self.table.populate_pv(&mut pv_moves, &mut state); self.pv.lock().unwrap().clone_from(&pv_moves); pv = pv_string::(&pv_moves[..], &mut state); + if unclamp_value(entry.value).abs() == BEST_EVAL { + break; + } } if self.opts.verbose && !background { eprintln!("Principal variation: {}", pv); From f3c777b08cd6b98f07853fe3b06ad61a92c80435 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Mon, 31 Oct 2022 16:21:43 -0700 Subject: [PATCH 109/162] New example for using minimax crate on third party chess library. --- examples/chess/Cargo.toml | 8 ++++ examples/chess/src/main.rs | 96 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 104 insertions(+) create mode 100644 examples/chess/Cargo.toml create mode 100644 examples/chess/src/main.rs diff --git a/examples/chess/Cargo.toml b/examples/chess/Cargo.toml new file mode 100644 index 0000000..f2a27f9 --- /dev/null +++ b/examples/chess/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "chess-minimax-example" +version = "0.1.0" +edition = "2021" + +[dependencies] +chess = "3.2" +minimax = { path = "../.." } \ No newline at end of file diff --git a/examples/chess/src/main.rs b/examples/chess/src/main.rs new file mode 100644 index 0000000..7f6ba97 --- /dev/null +++ b/examples/chess/src/main.rs @@ -0,0 +1,96 @@ +extern crate chess; +extern crate minimax; + +use minimax::{Game, Move, Strategy}; + +struct Chess; + +// Using newtypes to get external chess impl to implement minimax traits. +#[derive(Clone)] +struct Board { + history: Vec, +} +#[derive(Copy, Clone, Eq, PartialEq)] +struct ChessMove(chess::ChessMove); + +impl Board { + fn new() -> Self { + Self { history: vec![chess::Board::default()] } + } + fn board(&self) -> &chess::Board { + self.history.last().unwrap() + } +} + +impl minimax::Game for Chess { + type S = Board; + type M = ChessMove; + + fn generate_moves(b: &Board, moves: &mut Vec) { + for m in chess::MoveGen::new_legal(b.board()) { + moves.push(ChessMove(m)); + } + } + + fn get_winner(b: &Board) -> Option { + match b.board().status() { + chess::BoardStatus::Ongoing => None, + chess::BoardStatus::Stalemate => Some(minimax::Winner::Draw), + chess::BoardStatus::Checkmate => Some(minimax::Winner::PlayerJustMoved), + } + } +} + +impl minimax::Move for ChessMove { + type G = Chess; + fn apply(&self, b: &mut Board) { + b.history.push(b.board().make_move_new(self.0)); + } + + fn undo(&self, b: &mut Board) { + b.history.pop(); + } +} + +#[derive(Default)] +struct Evaluator; + +impl minimax::Evaluator for Evaluator { + type G = Chess; + fn evaluate(&self, b: &Board) -> minimax::Evaluation { + let board = b.board(); + let mut score = 0; + for sq in 0..64 { + let sq = unsafe { chess::Square::new(sq) }; + if let Some(piece) = board.piece_on(sq) { + let value = match piece { + chess::Piece::Pawn => 1, + chess::Piece::Knight => 3, + chess::Piece::Bishop => 3, + chess::Piece::Rook => 5, + chess::Piece::Queen => 9, + chess::Piece::King => 0, + }; + if board.color_on(sq).unwrap() == board.side_to_move() { + score += value; + } else { + score -= value; + } + } + } + score + } +} + +fn main() { + let mut b = Board::new(); + let mut strategy = minimax::Negamax::new(Evaluator::default(), 5); + while Chess::get_winner(&b).is_none() { + println!("{}", b.board()); + match strategy.choose_move(&b) { + Some(m) => m.apply(&mut b), + None => break, + } + } + println!("Checkmate {:?}", b.board().side_to_move()); +} From 7c701e6d5b1022d0a3f5f0c8379d70cd58b77ed7 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Mon, 31 Oct 2022 17:00:54 -0700 Subject: [PATCH 110/162] chess: Add zobrist and notation to use IterativeSearch. --- examples/chess/src/main.rs | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/examples/chess/src/main.rs b/examples/chess/src/main.rs index 7f6ba97..de5a2cd 100644 --- a/examples/chess/src/main.rs +++ b/examples/chess/src/main.rs @@ -22,6 +22,12 @@ impl Board { } } +impl minimax::Zobrist for Board { + fn zobrist_hash(&self) -> u64 { + self.board().get_hash() + } +} + impl minimax::Game for Chess { type S = Board; type M = ChessMove; @@ -50,6 +56,10 @@ impl minimax::Move for ChessMove { fn undo(&self, b: &mut Board) { b.history.pop(); } + + fn notation(&self, _b: &Board) -> Option { + Some(format!("{}", self.0)) + } } #[derive(Default)] @@ -84,7 +94,9 @@ impl minimax::Evaluator for Evaluator { fn main() { let mut b = Board::new(); - let mut strategy = minimax::Negamax::new(Evaluator::default(), 5); + let opts = minimax::IterativeOptions::new().verbose(); + let mut strategy = minimax::IterativeSearch::new(Evaluator::default(), opts); + strategy.set_timeout(std::time::Duration::from_secs(1)); while Chess::get_winner(&b).is_none() { println!("{}", b.board()); match strategy.choose_move(&b) { From c77a8f200ef7108aab514eb18d7d861d43a05c63 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Mon, 31 Oct 2022 23:04:16 -0700 Subject: [PATCH 111/162] Add thread-local MovePool to YBW. --- src/strategies/sync_util.rs | 28 ++++++++++++++++++++++++++++ src/strategies/ybw.rs | 31 +++++++++++++++++++++---------- 2 files changed, 49 insertions(+), 10 deletions(-) diff --git a/src/strategies/sync_util.rs b/src/strategies/sync_util.rs index da779b9..25b1de8 100644 --- a/src/strategies/sync_util.rs +++ b/src/strategies/sync_util.rs @@ -66,3 +66,31 @@ fn test_atomic_box() { b.try_set(Box::new(4)); assert_eq!(Some(&3), b.get()); } + +pub(super) struct ThreadLocal { + // Our owned reference to all the locals. + _locals: Vec, + // Mutable reference from which each thread finds its local. + ptr: *mut T, +} + +// Values are only accessed from their individual threads and references do not leak. +unsafe impl Send for ThreadLocal {} +unsafe impl Sync for ThreadLocal {} + +impl ThreadLocal { + pub(super) fn new(pool: &rayon::ThreadPool) -> Self { + let mut locals = Vec::new(); + for _ in 0..pool.current_num_threads() { + locals.push(T::default()); + } + let ptr = locals.as_mut_ptr(); + Self { _locals: locals, ptr } + } + + pub(super) fn local_do(&self, f: F) { + if let Some(index) = rayon::current_thread_index() { + f(unsafe { self.ptr.add(index).as_mut().unwrap() }); + } + } +} diff --git a/src/strategies/ybw.rs b/src/strategies/ybw.rs index 1154c83..8db1875 100644 --- a/src/strategies/ybw.rs +++ b/src/strategies/ybw.rs @@ -8,8 +8,9 @@ extern crate rayon; use super::super::interface::*; +use super::super::util::*; use super::iterative::IterativeOptions; -use super::sync_util::timeout_signal; +use super::sync_util::{timeout_signal, ThreadLocal}; use super::table::*; use super::util::*; @@ -70,6 +71,7 @@ struct ParallelNegamaxer { ybw_opts: YbwOptions, timeout: Arc, // TODO: stats + move_pool: ThreadLocal::M>>, pv: Mutex::M>>, } @@ -82,8 +84,17 @@ where fn new( opts: IterativeOptions, ybw_opts: YbwOptions, eval: E, table: Arc::M>>, timeout: Arc, + thread_pool: &rayon::ThreadPool, ) -> Self { - Self { table, eval, opts, ybw_opts, timeout, pv: Mutex::new(Vec::new()) } + Self { + table, + eval, + opts, + ybw_opts, + timeout, + move_pool: ThreadLocal::new(thread_pool), + pv: Mutex::new(Vec::new()), + } } fn principal_variation(&self) -> Vec<::M> { @@ -127,11 +138,11 @@ where return Some(self.eval.evaluate(s)); } - //let mut moves = self.move_pool.alloc(); let mut moves = Vec::new(); + self.move_pool.local_do(|pool| moves = pool.alloc()); self.eval.generate_noisy_moves(s, &mut moves); if moves.is_empty() { - //self.move_pool.free(moves); + self.move_pool.local_do(|pool| pool.free(moves)); return Some(self.eval.evaluate(s)); } @@ -146,7 +157,7 @@ where break; } } - //self.move_pool.free(moves); + self.move_pool.local_do(|pool| pool.free(moves)); Some(best) } @@ -185,13 +196,13 @@ where return Some(beta); } - //let mut moves = self.move_pool.alloc(); let mut moves = Vec::new(); + self.move_pool.local_do(|pool| moves = pool.alloc()); E::G::generate_moves(s, &mut moves); //self.total_generate_move_calls += 1; //self.total_generated_moves += moves.len() as u64; if moves.is_empty() { - //self.move_pool.free(moves); + self.move_pool.local_do(|pool| pool.free(moves)); return Some(WORST_EVAL); } let first_move = good_move.unwrap_or(moves[0]); @@ -287,7 +298,7 @@ where }; self.table.concurrent_update(hash, alpha_orig, beta, depth, best, best_move); - //self.move_pool.free(moves); + self.move_pool.local_do(|pool| pool.free(moves)); Some(clamp_value(best)) } @@ -360,7 +371,6 @@ pub struct ParallelYbw { background_cancel: Arc, table: Arc::M>>, - //move_pool: MovePool<::M>, prev_value: Evaluation, principal_variation: Vec<::M>, eval: E, @@ -381,7 +391,6 @@ impl ParallelYbw { max_time: Duration::from_secs(5), background_cancel: Arc::new(AtomicBool::new(false)), table, - //move_pool: MovePool::<_>::default(), prev_value: 0, principal_variation: Vec::new(), thread_pool: pool_builder.build().unwrap(), @@ -420,6 +429,7 @@ where self.eval.clone(), self.table.clone(), timeout, + &self.thread_pool, ); // Launch in threadpool and wait for result. let value_move = self @@ -441,6 +451,7 @@ where self.eval.clone(), self.table.clone(), self.background_cancel.clone(), + &self.thread_pool, ); let mut state = s.clone(); best_move.apply(&mut state); From 8473278d65a417bc14ce902f853a9a4ddb517a11 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Wed, 2 Nov 2022 09:35:37 -0700 Subject: [PATCH 112/162] Add thread-local countermoves table to ybw. It currently makes a new table every move instead of diluting previous moves. --- src/strategies/sync_util.rs | 6 ++-- src/strategies/ybw.rs | 55 +++++++++++++++++++++++++------------ 2 files changed, 40 insertions(+), 21 deletions(-) diff --git a/src/strategies/sync_util.rs b/src/strategies/sync_util.rs index 25b1de8..4b8434b 100644 --- a/src/strategies/sync_util.rs +++ b/src/strategies/sync_util.rs @@ -78,11 +78,11 @@ pub(super) struct ThreadLocal { unsafe impl Send for ThreadLocal {} unsafe impl Sync for ThreadLocal {} -impl ThreadLocal { - pub(super) fn new(pool: &rayon::ThreadPool) -> Self { +impl ThreadLocal { + pub(super) fn new T>(f: F, pool: &rayon::ThreadPool) -> Self { let mut locals = Vec::new(); for _ in 0..pool.current_num_threads() { - locals.push(T::default()); + locals.push(f()); } let ptr = locals.as_mut_ptr(); Self { _locals: locals, ptr } diff --git a/src/strategies/ybw.rs b/src/strategies/ybw.rs index 8db1875..9c64a4b 100644 --- a/src/strategies/ybw.rs +++ b/src/strategies/ybw.rs @@ -72,6 +72,7 @@ struct ParallelNegamaxer { timeout: Arc, // TODO: stats move_pool: ThreadLocal::M>>, + countermoves: ThreadLocal::M>>, pv: Mutex::M>>, } @@ -92,7 +93,11 @@ where opts, ybw_opts, timeout, - move_pool: ThreadLocal::new(thread_pool), + move_pool: ThreadLocal::new(MovePool::default, thread_pool), + countermoves: ThreadLocal::new( + || CounterMoves::new(opts.countermove_table, opts.countermove_history_table), + thread_pool, + ), pv: Mutex::new(Vec::new()), } } @@ -114,7 +119,7 @@ where { // If we just pass and let the opponent play this position (at reduced depth), null_move.apply(s); - let value = -self.negamax(s, depth - depth_reduction, -beta, -beta + 1)?; + let value = -self.negamax(s, None, depth - depth_reduction, -beta, -beta + 1)?; null_move.undo(s); // is the result still so good that we shouldn't bother with a full search? return Some(value); @@ -163,7 +168,8 @@ where // Recursively compute negamax on the game state. Returns None if it hits the timeout. fn negamax( - &self, s: &mut ::S, depth: u8, mut alpha: Evaluation, mut beta: Evaluation, + &self, s: &mut ::S, prev_move: Option<::M>, depth: u8, + mut alpha: Evaluation, mut beta: Evaluation, ) -> Option where ::S: Clone + Zobrist + Send + Sync, @@ -205,11 +211,21 @@ where self.move_pool.local_do(|pool| pool.free(moves)); return Some(WORST_EVAL); } - let first_move = good_move.unwrap_or(moves[0]); + + // Reorder moves. + if depth >= self.opts.min_reorder_moves_depth { + self.eval.reorder_moves(s, &mut moves); + } + self.countermoves.local_do(|cm| cm.reorder(prev_move, &mut moves)); + if let Some(good) = good_move { + move_to_front(good, &mut moves); + } + + let first_move = moves[0]; // Evaluate first move serially. first_move.apply(s); - let initial_value = -self.negamax(s, depth - 1, -beta, -alpha)?; + let initial_value = -self.negamax(s, Some(first_move), depth - 1, -beta, -alpha)?; first_move.undo(s); alpha = max(alpha, initial_value); let (best, best_move) = if alpha >= beta { @@ -220,21 +236,18 @@ where let mut best = initial_value; let mut best_move = first_move; let mut null_window = false; - for &m in moves.iter() { - if m == first_move { - continue; - } + for &m in moves[1..].iter() { m.apply(s); let value = if null_window { - let probe = -self.negamax(s, depth - 1, -alpha - 1, -alpha)?; + let probe = -self.negamax(s, Some(m), depth - 1, -alpha - 1, -alpha)?; if probe > alpha && probe < beta { // Full search fallback. - -self.negamax(s, depth - 1, -beta, -probe)? + -self.negamax(s, Some(m), depth - 1, -beta, -probe)? } else { probe } } else { - -self.negamax(s, depth - 1, -beta, -alpha)? + -self.negamax(s, Some(m), depth - 1, -beta, -alpha)? }; m.undo(s); if value > best { @@ -248,6 +261,7 @@ where null_window = self.opts.null_window_search; } if alpha >= beta { + self.countermoves.local_do(|cm| cm.update(prev_move, m)); break; } } @@ -256,7 +270,7 @@ where let alpha = AtomicI32::new(alpha); let best_move = Mutex::new(ValueMove::new(initial_value, first_move)); // Parallel search - let result = moves.par_iter().with_max_len(1).try_for_each(|&m| -> Option<()> { + let result = moves[1..].par_iter().with_max_len(1).try_for_each(|&m| -> Option<()> { // Check to see if we're cancelled by another branch. let initial_alpha = alpha.load(Ordering::SeqCst); if initial_alpha >= beta { @@ -267,20 +281,25 @@ where m.apply(&mut state); let value = if self.opts.null_window_search && initial_alpha > alpha_orig { // TODO: send reference to alpha as neg_beta to children. - let probe = - -self.negamax(&mut state, depth - 1, -initial_alpha - 1, -initial_alpha)?; + let probe = -self.negamax( + &mut state, + Some(m), + depth - 1, + -initial_alpha - 1, + -initial_alpha, + )?; if probe > initial_alpha && probe < beta { // Check again that we're not cancelled. if alpha.load(Ordering::SeqCst) >= beta { return None; } // Full search fallback. - -self.negamax(&mut state, depth - 1, -beta, -probe)? + -self.negamax(&mut state, Some(m), depth - 1, -beta, -probe)? } else { probe } } else { - -self.negamax(&mut state, depth - 1, -beta, -initial_alpha)? + -self.negamax(&mut state, Some(m), depth - 1, -beta, -initial_alpha)? }; alpha.fetch_max(value, Ordering::SeqCst); @@ -318,7 +337,7 @@ where } while depth <= max_depth as u8 { interval_start = Instant::now(); - if self.negamax(&mut state, depth, WORST_EVAL, BEST_EVAL).is_none() { + if self.negamax(&mut state, None, depth, WORST_EVAL, BEST_EVAL).is_none() { // Timeout. Return the best move from the previous depth. break; } From df37325c83d4bd372acae8fd27858f5853810ee4 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Wed, 2 Nov 2022 09:51:05 -0700 Subject: [PATCH 113/162] Pull out Stats struct for future reuse. --- src/strategies/iterative.rs | 59 +++++++++++++++++++++---------------- src/strategies/lazy_smp.rs | 15 +++++----- 2 files changed, 41 insertions(+), 33 deletions(-) diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index 0884518..7b04865 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -258,6 +258,29 @@ impl IterativeOptions { } } +#[derive(Default)] +pub(crate) struct Stats { + pub(crate) nodes_explored: u64, + pub(crate) total_generate_move_calls: u64, + pub(crate) total_generated_moves: u64, +} + +impl Stats { + pub(crate) fn reset(&mut self) { + self.nodes_explored = 0; + self.total_generate_move_calls = 0; + self.total_generated_moves = 0; + } + pub(crate) fn explore_node(&mut self) { + self.nodes_explored += 1; + } + + pub(crate) fn generate_moves(&mut self, num_moves: usize) { + self.total_generate_move_calls += 1; + self.total_generated_moves += num_moves as u64; + } +} + pub(super) struct Negamaxer { #[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] timeout: Arc, @@ -270,13 +293,8 @@ pub(super) struct Negamaxer { move_pool: MovePool<::M>, eval: E, - // Config opts: IterativeOptions, - - // Stats - pub(crate) nodes_explored: u64, - pub(crate) total_generate_move_calls: u64, - pub(crate) total_generated_moves: u64, + pub(crate) stats: Stats, } impl::M>> Negamaxer @@ -297,18 +315,10 @@ where eval, move_pool: MovePool::default(), opts, - nodes_explored: 0, - total_generate_move_calls: 0, - total_generated_moves: 0, + stats: Stats::default(), } } - fn reset_stats(&mut self) { - self.nodes_explored = 0; - self.total_generate_move_calls = 0; - self.total_generated_moves = 0; - } - #[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] pub(super) fn set_timeout(&mut self, timeout: Arc) { self.timeout = timeout; @@ -415,7 +425,7 @@ where return None; } - self.nodes_explored += 1; + self.stats.explore_node(); if depth == 0 { // Evaluate quiescence search on leaf nodes. @@ -439,8 +449,7 @@ where let mut moves = self.move_pool.alloc(); E::G::generate_moves(s, &mut moves); - self.total_generate_move_calls += 1; - self.total_generated_moves += moves.len() as u64; + self.stats.generate_moves(moves.len()); if moves.is_empty() { self.move_pool.free(moves); return Some(WORST_EVAL); @@ -573,16 +582,16 @@ where /// Return a human-readable summary of the last move generation. pub fn stats(&self, s: &mut ::S) -> String { let total_nodes_explored: u64 = self.nodes_explored.iter().sum(); - let mean_branching_factor = self.negamaxer.total_generated_moves as f64 - / self.negamaxer.total_generate_move_calls as f64; + let mean_branching_factor = self.negamaxer.stats.total_generated_moves as f64 + / self.negamaxer.stats.total_generate_move_calls as f64; let effective_branching_factor = (*self.nodes_explored.last().unwrap_or(&0) as f64) .powf((self.actual_depth as f64 + 1.0).recip()); - let throughput = (total_nodes_explored + self.negamaxer.nodes_explored) as f64 + let throughput = (total_nodes_explored + self.negamaxer.stats.nodes_explored) as f64 / self.wall_time.as_secs_f64(); format!("Principal variation: {}\nExplored {} nodes to depth {}. MBF={:.1} EBF={:.1}\nPartial exploration of next depth hit {} nodes.\n{} nodes/sec", pv_string::(&self.pv[..], s), total_nodes_explored, self.actual_depth, mean_branching_factor, effective_branching_factor, - self.negamaxer.nodes_explored, throughput as usize) + self.negamaxer.stats.nodes_explored, throughput as usize) } #[doc(hidden)] @@ -624,7 +633,7 @@ where self.negamaxer.countermoves.advance_generation(E::G::null_move(s)); // Reset stats. self.nodes_explored.clear(); - self.negamaxer.reset_stats(); + self.negamaxer.stats.reset(); self.actual_depth = 0; let start_time = Instant::now(); // Start timer if configured. @@ -698,8 +707,8 @@ where } self.actual_depth = max(self.actual_depth, depth); - self.nodes_explored.push(self.negamaxer.nodes_explored); - self.negamaxer.nodes_explored = 0; + self.nodes_explored.push(self.negamaxer.stats.nodes_explored); + self.negamaxer.stats.nodes_explored = 0; self.prev_value = entry.value; depth += self.opts.step_increment; self.negamaxer.table.populate_pv(&mut self.pv, &mut s_clone); diff --git a/src/strategies/lazy_smp.rs b/src/strategies/lazy_smp.rs index 4dbdad8..efac654 100644 --- a/src/strategies/lazy_smp.rs +++ b/src/strategies/lazy_smp.rs @@ -99,12 +99,11 @@ impl SharedStats { } fn update(&self, negamaxer: &mut Negamaxer) { - self.nodes_explored.fetch_add(negamaxer.nodes_explored, Ordering::SeqCst); - negamaxer.nodes_explored = 0; - self.generated_moves.fetch_add(negamaxer.total_generated_moves, Ordering::SeqCst); - negamaxer.total_generated_moves = 0; - self.generate_move_calls.fetch_add(negamaxer.total_generate_move_calls, Ordering::SeqCst); - negamaxer.total_generate_move_calls = 0; + self.nodes_explored.fetch_add(negamaxer.stats.nodes_explored, Ordering::SeqCst); + self.generated_moves.fetch_add(negamaxer.stats.total_generated_moves, Ordering::SeqCst); + self.generate_move_calls + .fetch_add(negamaxer.stats.total_generate_move_calls, Ordering::SeqCst); + negamaxer.stats.reset(); } fn reset_nodes_explored(&self) -> u64 { @@ -341,12 +340,12 @@ where / self.shared_stats.generate_move_calls.load(Ordering::SeqCst) as f64; let effective_branching_factor = (*self.nodes_explored.last().unwrap_or(&0) as f64) .powf((self.actual_depth as f64 + 1.0).recip()); - let throughput = (total_nodes_explored + self.negamaxer.nodes_explored) as f64 + let throughput = (total_nodes_explored + self.negamaxer.stats.nodes_explored) as f64 / self.wall_time.as_secs_f64(); format!("Principal variation: {}\nExplored {} nodes to depth {}. MBF={:.1} EBF={:.1}\nPartial exploration of next depth hit {} nodes.\n{} nodes/sec", pv_string::(&self.pv[..], s), total_nodes_explored, self.actual_depth, mean_branching_factor, effective_branching_factor, - self.negamaxer.nodes_explored, throughput as usize) + self.negamaxer.stats.nodes_explored, throughput as usize) } } From 2163a93051e0a7d3e32387b090158f9b15938aa5 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Wed, 2 Nov 2022 11:34:31 -0700 Subject: [PATCH 114/162] Add basic stats to YBW via threadlocals. Currently only nodes/sec and mean branching factor. Effective branching factor would require somehow checkpointing stats across all threadlocals on each iteration. --- src/strategies/iterative.rs | 6 ++++ src/strategies/sync_util.rs | 64 ++++++++++++++++++++++++++++++++++--- src/strategies/ybw.rs | 30 +++++++++++++---- 3 files changed, 89 insertions(+), 11 deletions(-) diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index 7b04865..f44f135 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -279,6 +279,12 @@ impl Stats { self.total_generate_move_calls += 1; self.total_generated_moves += num_moves as u64; } + + pub(crate) fn add(&mut self, other: &Self) { + self.nodes_explored += other.nodes_explored; + self.total_generate_move_calls += other.total_generate_move_calls; + self.total_generated_moves += other.total_generated_moves; + } } pub(super) struct Negamaxer { diff --git a/src/strategies/sync_util.rs b/src/strategies/sync_util.rs index 4b8434b..fd31c94 100644 --- a/src/strategies/sync_util.rs +++ b/src/strategies/sync_util.rs @@ -1,3 +1,4 @@ +use std::ops::{Deref, DerefMut}; use std::sync::atomic::{AtomicBool, AtomicPtr, Ordering}; use std::sync::Arc; use std::thread::{sleep, spawn}; @@ -67,9 +68,11 @@ fn test_atomic_box() { assert_eq!(Some(&3), b.get()); } +// Safe API around lockfree threadlocals for rayon threadpools. +// Only safe when used from a single threadpool, as this crate does. pub(super) struct ThreadLocal { // Our owned reference to all the locals. - _locals: Vec, + locals: Vec, // Mutable reference from which each thread finds its local. ptr: *mut T, } @@ -85,12 +88,65 @@ impl ThreadLocal { locals.push(f()); } let ptr = locals.as_mut_ptr(); - Self { _locals: locals, ptr } + Self { locals, ptr } } pub(super) fn local_do(&self, f: F) { - if let Some(index) = rayon::current_thread_index() { - f(unsafe { self.ptr.add(index).as_mut().unwrap() }); + // It would be nice to keep a handle to the threadpool to ensure this + // thread is from only our pool, but the lifetimes seem too + // restrictive. + let index = rayon::current_thread_index().unwrap(); + f(unsafe { self.ptr.add(index).as_mut().unwrap() }); + } + + // With a &mut self, no other threads can be using it. + pub(super) fn do_all(&mut self, mut f: F) { + for local in self.locals.iter_mut() { + f(local); } } } + +#[test] +fn test_threadlocal() { + use rayon::prelude::*; + let pool = rayon::ThreadPoolBuilder::new().build().unwrap(); + let mut tls = ThreadLocal::::new(|| 0, &pool); + let count = 100000; + (0..count).into_par_iter().for_each(|_| tls.local_do(|x| *x += 1)); + let mut sum = 0; + tls.do_all(|x| sum += *x); + assert_eq!(sum, count); + + let result = std::panic::catch_unwind(|| { + // Invalid call from outside pool. + tls.local_do(|x| *x += 1); + }); + assert!(result.is_err()); +} + +// 64-bytes is a common cache line size. +#[repr(align(64))] +pub(super) struct CachePadded { + value: T, +} + +impl Default for CachePadded { + fn default() -> Self { + Self { value: T::default() } + } +} + +impl Deref for CachePadded { + type Target = T; + + fn deref(&self) -> &T { + &self.value + } +} + +impl DerefMut for CachePadded { + fn deref_mut(&mut self) -> &mut T { + &mut self.value + } +} diff --git a/src/strategies/ybw.rs b/src/strategies/ybw.rs index 9c64a4b..302f300 100644 --- a/src/strategies/ybw.rs +++ b/src/strategies/ybw.rs @@ -9,8 +9,8 @@ extern crate rayon; use super::super::interface::*; use super::super::util::*; -use super::iterative::IterativeOptions; -use super::sync_util::{timeout_signal, ThreadLocal}; +use super::iterative::{IterativeOptions, Stats}; +use super::sync_util::{timeout_signal, CachePadded, ThreadLocal}; use super::table::*; use super::util::*; @@ -70,7 +70,7 @@ struct ParallelNegamaxer { opts: IterativeOptions, ybw_opts: YbwOptions, timeout: Arc, - // TODO: stats + stats: ThreadLocal>, move_pool: ThreadLocal::M>>, countermoves: ThreadLocal::M>>, pv: Mutex::M>>, @@ -93,6 +93,7 @@ where opts, ybw_opts, timeout, + stats: ThreadLocal::new(CachePadded::default, thread_pool), move_pool: ThreadLocal::new(MovePool::default, thread_pool), countermoves: ThreadLocal::new( || CounterMoves::new(opts.countermove_table, opts.countermove_history_table), @@ -180,7 +181,7 @@ where return None; } - //self.next_depth_nodes += 1; + self.stats.local_do(|stats| stats.explore_node()); if depth == 0 { // Evaluate quiescence search on leaf nodes. @@ -205,8 +206,7 @@ where let mut moves = Vec::new(); self.move_pool.local_do(|pool| moves = pool.alloc()); E::G::generate_moves(s, &mut moves); - //self.total_generate_move_calls += 1; - //self.total_generated_moves += moves.len() as u64; + self.stats.local_do(|stats| stats.generate_moves(moves.len())); if moves.is_empty() { self.move_pool.local_do(|pool| pool.free(moves)); return Some(WORST_EVAL); @@ -384,6 +384,16 @@ where } } +fn pretty_stats(stats: &Stats, start: Instant) -> String { + let mean_branching_factor = + stats.total_generated_moves as f64 / stats.total_generate_move_calls as f64; + let throughput = (stats.nodes_explored) as f64 / (Instant::now() - start).as_secs_f64(); + format!( + "Explored {} nodes. MBF={:.1}\n{} nodes/sec", + stats.nodes_explored, mean_branching_factor, throughput as usize + ) +} + pub struct ParallelYbw { max_depth: u8, max_time: Duration, @@ -442,7 +452,8 @@ where }; let best_value_move = { - let negamaxer = ParallelNegamaxer::new( + let start_time = Instant::now(); + let mut negamaxer = ParallelNegamaxer::new( self.opts, self.ybw_opts, self.eval.clone(), @@ -455,6 +466,11 @@ where .thread_pool .install(|| negamaxer.iterative_search(s.clone(), self.max_depth, false)); self.principal_variation = negamaxer.principal_variation(); + let mut stats = Stats::default(); + negamaxer.stats.do_all(|local| stats.add(local)); + if self.opts.verbose { + eprintln!("{}", pretty_stats(&stats, start_time)); + } value_move }; if let Some((best_move, value)) = best_value_move { From c978eac5af0a6230fad5bc79d32f83e0c368def6 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Thu, 3 Nov 2022 21:58:45 -0700 Subject: [PATCH 115/162] Simplify ThreadLocal implementation --- src/strategies/sync_util.rs | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/strategies/sync_util.rs b/src/strategies/sync_util.rs index fd31c94..7a784e1 100644 --- a/src/strategies/sync_util.rs +++ b/src/strategies/sync_util.rs @@ -78,15 +78,13 @@ pub(super) struct ThreadLocal { } // Values are only accessed from their individual threads and references do not leak. -unsafe impl Send for ThreadLocal {} +unsafe impl Send for ThreadLocal {} unsafe impl Sync for ThreadLocal {} impl ThreadLocal { pub(super) fn new T>(f: F, pool: &rayon::ThreadPool) -> Self { - let mut locals = Vec::new(); - for _ in 0..pool.current_num_threads() { - locals.push(f()); - } + let n = pool.current_num_threads(); + let mut locals = (0..n).map(|_| f()).collect::>(); let ptr = locals.as_mut_ptr(); Self { locals, ptr } } @@ -96,14 +94,13 @@ impl ThreadLocal { // thread is from only our pool, but the lifetimes seem too // restrictive. let index = rayon::current_thread_index().unwrap(); + assert!(index < self.locals.len()); f(unsafe { self.ptr.add(index).as_mut().unwrap() }); } // With a &mut self, no other threads can be using it. - pub(super) fn do_all(&mut self, mut f: F) { - for local in self.locals.iter_mut() { - f(local); - } + pub(super) fn do_all(&mut self, f: F) { + self.locals.iter_mut().for_each(f); } } From 07c7e75499f491c01bec5664f5b2c77e8c6ee626 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Thu, 3 Nov 2022 22:08:12 -0700 Subject: [PATCH 116/162] Reduce Evaluation to an i16. Not only is 64k values enough for anybody, but encouraging coarser evaluation functions leads to alpha-beta being more efficient. --- examples/connect4.rs | 6 ++++-- src/interface.rs | 4 ++-- src/strategies/sync_util.rs | 2 +- src/strategies/table.rs | 6 ++++-- src/strategies/ybw.rs | 4 ++-- tests/strategies.rs | 2 +- 6 files changed, 14 insertions(+), 10 deletions(-) diff --git a/examples/connect4.rs b/examples/connect4.rs index 8a8e07b..45405eb 100644 --- a/examples/connect4.rs +++ b/examples/connect4.rs @@ -215,8 +215,10 @@ impl minimax::Evaluator for BasicEvaluator { let mut score = 0; // Bonus points for moves in the middle columns. for col in 2..5 { - score += ((player_pieces >> (HEIGHT * col)) & COL_MASK).count_ones() as i32; - score -= ((opponent_pieces >> (HEIGHT * col)) & COL_MASK).count_ones() as i32; + score += + ((player_pieces >> (HEIGHT * col)) & COL_MASK).count_ones() as minimax::Evaluation; + score -= ((opponent_pieces >> (HEIGHT * col)) & COL_MASK).count_ones() + as minimax::Evaluation; } // Count columns that cause immediate win. diff --git a/src/interface.rs b/src/interface.rs index 45ac397..6372f1c 100644 --- a/src/interface.rs +++ b/src/interface.rs @@ -3,14 +3,14 @@ /// An assessment of a game state from the perspective of the player whose turn it is to play. /// Higher values mean a more favorable state. /// A draw is defined as a score of zero. -pub type Evaluation = i32; +pub type Evaluation = i16; // These definitions ensure that they negate to each other, but it leaves // i32::MIN as a valid value less than WORST_EVAL. Don't use this value, and // any Strategy will panic when it tries to negate it. /// An absolutely wonderful outcome, e.g. a win. -pub const BEST_EVAL: Evaluation = i32::MAX; +pub const BEST_EVAL: Evaluation = i16::MAX; /// An absolutely disastrous outcome, e.g. a loss. pub const WORST_EVAL: Evaluation = -BEST_EVAL; diff --git a/src/strategies/sync_util.rs b/src/strategies/sync_util.rs index 7a784e1..2dcae59 100644 --- a/src/strategies/sync_util.rs +++ b/src/strategies/sync_util.rs @@ -94,7 +94,7 @@ impl ThreadLocal { // thread is from only our pool, but the lifetimes seem too // restrictive. let index = rayon::current_thread_index().unwrap(); - assert!(index < self.locals.len()); + assert!(index < self.locals.len()); f(unsafe { self.ptr.add(index).as_mut().unwrap() }); } diff --git a/src/strategies/table.rs b/src/strategies/table.rs index 70474cf..6db798e 100644 --- a/src/strategies/table.rs +++ b/src/strategies/table.rs @@ -14,6 +14,7 @@ pub(super) enum EntryFlag { } #[derive(Copy, Clone)] +#[repr(align(16))] pub(super) struct Entry { pub(super) high_hash: u32, pub(super) value: Evaluation, @@ -25,8 +26,8 @@ pub(super) struct Entry { #[test] fn test_entry_size() { - assert!(std::mem::size_of::>() <= 16); - assert!(std::mem::size_of::>() <= 16); + assert!(std::mem::size_of::>() <= 16); + assert!(std::mem::size_of::>() <= 16); } pub(super) fn high_bits(hash: u64) -> u32 { @@ -247,6 +248,7 @@ impl ConcurrentTable for RacyTable { } } +#[repr(align(16))] struct ConcurrentEntry { high_hash: AtomicU32, value: Evaluation, diff --git a/src/strategies/ybw.rs b/src/strategies/ybw.rs index 302f300..82a1a1e 100644 --- a/src/strategies/ybw.rs +++ b/src/strategies/ybw.rs @@ -16,7 +16,7 @@ use super::util::*; use rayon::prelude::*; use std::cmp::max; -use std::sync::atomic::{AtomicBool, AtomicI32, Ordering}; +use std::sync::atomic::{AtomicBool, AtomicI16, Ordering}; use std::sync::{Arc, Mutex}; use std::time::{Duration, Instant}; @@ -267,7 +267,7 @@ where } (best, best_move) } else { - let alpha = AtomicI32::new(alpha); + let alpha = AtomicI16::new(alpha); let best_move = Mutex::new(ValueMove::new(initial_value, first_move)); // Parallel search let result = moves[1..].par_iter().with_max_len(1).try_for_each(|&m| -> Option<()> { diff --git a/tests/strategies.rs b/tests/strategies.rs index aacd01d..269682c 100644 --- a/tests/strategies.rs +++ b/tests/strategies.rs @@ -98,7 +98,7 @@ impl minimax::Evaluator for RandomEvaluator { hasher.write_u64(b.pieces_to_move); let hash = hasher.finish(); // Use fewer bits so that we get some equal values. - (hash as minimax::Evaluation) >> 25 + (hash as minimax::Evaluation) >> 9 } } From 19d51f170da7d756020ef4f9be3b7de563ea3bc5 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Thu, 3 Nov 2022 22:27:19 -0700 Subject: [PATCH 117/162] Delete LazySMP. While it happily shared a lot of single-threaded negamax code with IterativeSearch, in the end YBW gives amazing speedups and LazySMP only sometimes breaks even. --- examples/connect4.rs | 8 +- src/lib.rs | 2 - src/strategies/lazy_smp.rs | 478 ------------------------------------- src/strategies/mod.rs | 2 - tests/strategies.rs | 23 +- 5 files changed, 3 insertions(+), 510 deletions(-) delete mode 100644 src/strategies/lazy_smp.rs diff --git a/examples/connect4.rs b/examples/connect4.rs index 45405eb..df811bb 100644 --- a/examples/connect4.rs +++ b/examples/connect4.rs @@ -275,16 +275,12 @@ fn main() { iterative.set_max_depth(12); let mut parallelybw = ParallelYbw::new(BasicEvaluator::default(), opts, YbwOptions::new()); parallelybw.set_max_depth(12); - let mut lazysmp = LazySmp::new(BasicEvaluator::default(), opts, LazySmpOptions::new()); - lazysmp.set_max_depth(12); - let mut strategies: [&mut dyn Strategy; 4] = - [&mut dumb, &mut iterative, &mut parallelybw, &mut lazysmp]; + let mut strategies: [&mut dyn Strategy; 3] = + [&mut dumb, &mut iterative, &mut parallelybw]; if std::env::args().any(|arg| arg == "parallel") { strategies.swap(1, 2); - } else if std::env::args().any(|arg| arg == "lazysmp") { - strategies.swap(1, 3); } let mut s = 0; diff --git a/src/lib.rs b/src/lib.rs index ecce62f..746ee1b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,8 +9,6 @@ pub use interface::{ }; pub use strategies::iterative::{IterativeOptions, IterativeSearch, Replacement}; #[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] -pub use strategies::lazy_smp::{LazySmp, LazySmpOptions}; -#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] pub use strategies::mcts::{MCTSOptions, MonteCarloTreeSearch}; pub use strategies::negamax::Negamax; pub use strategies::random::Random; diff --git a/src/strategies/lazy_smp.rs b/src/strategies/lazy_smp.rs deleted file mode 100644 index efac654..0000000 --- a/src/strategies/lazy_smp.rs +++ /dev/null @@ -1,478 +0,0 @@ -//! An implementation of parallelized Negamax via the Lazy Symmetric -//! MultiProcessing algorithm. -//! -//! This parallel algorithm minimizes cross-thread synchronization and -//! minimizes game state cloning, at the expense of doing more duplicative -//! work across different threads. - -extern crate num_cpus; -extern crate rand; - -use super::super::interface::*; -use super::iterative::{IterativeOptions, Negamaxer}; -use super::sync_util::timeout_signal; -use super::table::*; -use super::util::*; - -use rand::seq::SliceRandom; -use std::cmp::max; -use std::sync::atomic::{AtomicBool, AtomicU64, Ordering}; -use std::sync::{Arc, Condvar, Mutex}; -use std::thread::spawn; -use std::time::{Duration, Instant}; - -/// Options to use for the iterative search engine. -#[derive(Clone, Copy)] -pub struct LazySmpOptions { - // Default is one per core. - num_threads: Option, - differing_depths: bool, - // TODO: optional bonus thread local TT? - // TODO: min_TT_depth? -} - -impl LazySmpOptions { - pub fn new() -> Self { - LazySmpOptions { num_threads: None, differing_depths: false } - } -} - -impl Default for LazySmpOptions { - fn default() -> Self { - Self::new() - } -} - -impl LazySmpOptions { - /// Set the total number of threads to use. Otherwise defaults to num_cpus. - pub fn with_num_threads(mut self, num_threads: usize) -> Self { - self.num_threads = Some(num_threads); - self - } - - /// Enables the helper threads to explore the tree at multiple depths simultaneously. - pub fn with_differing_depths(mut self) -> Self { - self.differing_depths = true; - self - } - - fn num_threads(self) -> usize { - self.num_threads.unwrap_or_else(num_cpus::get) - } -} - -#[derive(Clone)] -struct Search { - state: S, - depth: u8, - alpha: Evaluation, - beta: Evaluation, - timeout: Arc, -} - -// A directive to the helper threads. -enum Command { - Wait, - Exit, - Search(Search), -} - -struct SharedStats { - nodes_explored: AtomicU64, - generated_moves: AtomicU64, - generate_move_calls: AtomicU64, -} - -impl SharedStats { - fn new() -> Self { - Self { - nodes_explored: AtomicU64::new(0), - generated_moves: AtomicU64::new(0), - generate_move_calls: AtomicU64::new(0), - } - } - - fn reset(&self) { - self.nodes_explored.store(0, Ordering::SeqCst); - self.generated_moves.store(0, Ordering::SeqCst); - self.generate_move_calls.store(0, Ordering::SeqCst); - } - - fn update(&self, negamaxer: &mut Negamaxer) { - self.nodes_explored.fetch_add(negamaxer.stats.nodes_explored, Ordering::SeqCst); - self.generated_moves.fetch_add(negamaxer.stats.total_generated_moves, Ordering::SeqCst); - self.generate_move_calls - .fetch_add(negamaxer.stats.total_generate_move_calls, Ordering::SeqCst); - negamaxer.stats.reset(); - } - - fn reset_nodes_explored(&self) -> u64 { - self.nodes_explored.swap(0, Ordering::SeqCst) - } -} - -struct Helper -where - ::S: Clone, - ::M: Copy + Eq, -{ - negamaxer: Negamaxer::M>>>, - signal: Arc::S>>, - stats: Arc, - extra_depth: u8, -} - -impl Helper -where - ::S: Clone + Zobrist, - ::M: Copy + Eq, -{ - fn process(&mut self) { - let mut prev_hash: u64 = 0; - let mut prev_depth: u8 = 200; - let mut prev_alpha = 0; - let mut prev_beta = 0; - loop { - let mut search = { - let command = self.signal.command.lock().unwrap(); - // Stay waiting during Wait command or if we already completed Search command. - let command = self - .signal - .signal - .wait_while(command, |c| match *c { - Command::Exit => false, - Command::Wait => true, - Command::Search(ref search) => { - search.state.zobrist_hash() == prev_hash - && search.depth == prev_depth - && prev_alpha == search.alpha - && prev_beta == search.beta - } - }) - .unwrap(); - // Do command. - match *command { - Command::Exit => return, - Command::Wait => continue, - Command::Search(ref search) => search.clone(), - } - }; - prev_hash = search.state.zobrist_hash(); - prev_depth = search.depth; - prev_alpha = search.alpha; - prev_beta = search.beta; - - let depth = search.depth + self.extra_depth; - self.negamaxer.set_timeout(search.timeout.clone()); - let mut alpha = search.alpha; - let mut beta = search.beta; - self.negamaxer.table.check( - search.state.zobrist_hash(), - depth, - &mut None, - &mut alpha, - &mut beta, - ); - - self.negamaxer.countermoves.advance_generation(E::G::null_move(&search.state)); - // Randomize the first level of moves. - let mut moves = Vec::new(); - E::G::generate_moves(&search.state, &mut moves); - moves.shuffle(&mut rand::thread_rng()); - // Negamax search the rest. - for m in moves { - m.apply(&mut search.state); - if let Some(value) = - self.negamaxer.negamax(&mut search.state, Some(m), depth - 1, alpha, beta) - { - alpha = max(alpha, -value); - } else { - break; - } - if alpha >= beta { - break; - } - m.undo(&mut search.state); - } - - // Computation finished or interrupted, go back to sleep. - self.stats.update(&mut self.negamaxer); - } - } -} - -struct CommandSignal { - command: Mutex>, - signal: Condvar, -} - -impl CommandSignal -where - S: Clone, -{ - fn new() -> Self { - Self { command: Mutex::new(Command::Wait), signal: Condvar::new() } - } - - fn update(&self, new_command: Command) { - let mut command = self.command.lock().unwrap(); - if let Command::Search(ref search) = *command { - search.timeout.store(true, Ordering::SeqCst); - } - *command = new_command; - self.signal.notify_all(); - } - - fn wait(&self) { - self.update(Command::Wait); - } - - fn new_search(&self, state: &S, depth: u8, alpha: Evaluation, beta: Evaluation) { - self.update(Command::Search(Search { - state: state.clone(), - depth, - alpha, - beta, - timeout: Arc::new(AtomicBool::new(false)), - })); - } -} - -impl Drop for CommandSignal -where - S: Clone, -{ - fn drop(&mut self) { - self.update(Command::Exit); - } -} - -pub struct LazySmp -where - ::S: Clone + Zobrist, - ::M: Copy + Eq, -{ - max_depth: u8, - max_time: Duration, - table: Arc::M>>, - negamaxer: Negamaxer::M>>>, - signal: Arc::S>>, - - opts: IterativeOptions, - num_threads: usize, - - // Runtime stats for the last move generated. - prev_value: Evaluation, - // Maximum depth used to produce the move. - actual_depth: u8, - // Nodes explored at each depth. - nodes_explored: Vec, - shared_stats: Arc, - pv: Vec<::M>, - wall_time: Duration, -} - -impl LazySmp -where - ::S: Clone + Zobrist + Send, - ::M: Copy + Eq + Send, - E: Clone + Send, -{ - pub fn new(eval: E, opts: IterativeOptions, smp_opts: LazySmpOptions) -> LazySmp - where - E: 'static, - { - let table = Arc::new(LockfreeTable::new(opts.table_byte_size)); - let stats = Arc::new(SharedStats::new()); - let signal = Arc::new(CommandSignal::new()); - let num_threads = smp_opts.num_threads(); - // start n-1 helper threads - for iter in 1..num_threads { - let table2 = table.clone(); - let eval2 = eval.clone(); - let opts2 = opts; - let signal2 = signal.clone(); - let stats2 = stats.clone(); - let extra_depth = if smp_opts.differing_depths { iter as u8 & 1 } else { 0 }; - spawn(move || { - let mut helper = Helper { - negamaxer: Negamaxer::new(table2, eval2, opts2), - signal: signal2, - stats: stats2, - extra_depth, - }; - helper.process(); - }); - } - let negamaxer = Negamaxer::new(table.clone(), eval, opts); - LazySmp { - max_depth: 99, - max_time: Duration::from_secs(5), - table, - negamaxer, - signal, - prev_value: 0, - opts, - num_threads, - actual_depth: 0, - nodes_explored: Vec::new(), - shared_stats: stats, - pv: Vec::new(), - wall_time: Duration::default(), - } - } - - #[doc(hidden)] - pub fn root_value(&self) -> Evaluation { - unclamp_value(self.prev_value) - } -} - -impl LazySmp -where - ::S: Clone + Zobrist, - ::M: Copy + Eq, -{ - /// Return a human-readable summary of the last move generation. - pub fn stats(&self, s: &mut ::S) -> String { - let total_nodes_explored: u64 = self.nodes_explored.iter().sum(); - let mean_branching_factor = self.shared_stats.generated_moves.load(Ordering::SeqCst) as f64 - / self.shared_stats.generate_move_calls.load(Ordering::SeqCst) as f64; - let effective_branching_factor = (*self.nodes_explored.last().unwrap_or(&0) as f64) - .powf((self.actual_depth as f64 + 1.0).recip()); - let throughput = (total_nodes_explored + self.negamaxer.stats.nodes_explored) as f64 - / self.wall_time.as_secs_f64(); - format!("Principal variation: {}\nExplored {} nodes to depth {}. MBF={:.1} EBF={:.1}\nPartial exploration of next depth hit {} nodes.\n{} nodes/sec", - pv_string::(&self.pv[..], s), - total_nodes_explored, self.actual_depth, mean_branching_factor, effective_branching_factor, - self.negamaxer.stats.nodes_explored, throughput as usize) - } -} - -impl Strategy for LazySmp -where - ::S: Clone + Zobrist, - ::M: Copy + Eq, -{ - fn choose_move(&mut self, s: &::S) -> Option<::M> { - self.table.concurrent_advance_generation(); - // Reset stats. - self.shared_stats.reset(); - self.nodes_explored.clear(); - self.actual_depth = 0; - let start_time = Instant::now(); - // Start timer if configured. - self.negamaxer.set_timeout(if self.max_time == Duration::new(0, 0) { - Arc::new(AtomicBool::new(false)) - } else { - timeout_signal(self.max_time) - }); - - let root_hash = s.zobrist_hash(); - let mut s_clone = s.clone(); - let mut best_move = None; - let mut interval_start; - let mut maxxed = false; - // Store the moves so they can be reordered every iteration. - let mut moves = Vec::new(); - E::G::generate_moves(&s_clone, &mut moves); - // Start in a random order. - moves.shuffle(&mut rand::thread_rng()); - let mut moves = moves.into_iter().map(|m| ValueMove::new(0, m)).collect::>(); - - let mut depth = self.max_depth as u8 % self.opts.step_increment; - if depth == 0 { - depth = self.opts.step_increment; - } - while depth <= self.max_depth as u8 { - interval_start = Instant::now(); - if let Some(window) = self.opts.aspiration_window { - // First, parallel aspiration search to at least establish some bounds. - let mut alpha = self.prev_value.saturating_sub(window); - if alpha < WORST_EVAL { - alpha = WORST_EVAL; - } - let beta = self.prev_value.saturating_add(window); - self.signal.new_search(s, depth, alpha, beta); - - if self - .negamaxer - .aspiration_search(&mut s_clone, depth, self.prev_value, window) - .is_none() - { - // Timeout. - break; - } - if self.opts.verbose && !maxxed { - if let Some(entry) = self.table.lookup(root_hash) { - let end = Instant::now(); - let interval = end - interval_start; - eprintln!( - "LazySmp (threads={}) aspiration depth{:>2} took{:>5}ms; bounds{:>5} bestmove={}", - self.num_threads, - depth, - interval.as_millis(), - entry.bounds(), - move_id::(&mut s_clone, entry.best_move) - ); - interval_start = end; - } - } - } - - self.signal.new_search(s, depth, WORST_EVAL, BEST_EVAL); - - let value = self.negamaxer.search_and_reorder(&mut s_clone, &mut moves, depth); - if value.is_none() { - // Timeout. Return the best move from the previous depth. - break; - } - - let entry = self.table.lookup(root_hash).unwrap(); - best_move = entry.best_move; - - if self.opts.verbose && !maxxed { - let interval = Instant::now() - interval_start; - eprintln!( - "LazySmp (threads={}) fullsearch depth{:>2} took{:>5}ms; value{:>6} bestmove={}", - self.num_threads, - depth, - interval.as_millis(), - entry.value_string(), - move_id::(&mut s_clone, entry.best_move) - ); - if unclamp_value(value.unwrap()).abs() == BEST_EVAL { - maxxed = true; - } - } - - self.actual_depth = max(self.actual_depth, depth); - self.prev_value = entry.value; - depth += self.opts.step_increment; - self.table.populate_pv(&mut self.pv, &mut s_clone); - self.shared_stats.update(&mut self.negamaxer); - self.nodes_explored.push(self.shared_stats.reset_nodes_explored()); - } - self.signal.wait(); - self.wall_time = start_time.elapsed(); - if self.opts.verbose { - let mut s_clone = s.clone(); - eprintln!("{}", self.stats(&mut s_clone)); - } - best_move - } - - fn set_timeout(&mut self, max_time: Duration) { - self.max_time = max_time; - self.max_depth = 99; - } - - fn set_max_depth(&mut self, depth: u8) { - self.max_depth = depth; - self.max_time = Duration::new(0, 0); - } - - fn principal_variation(&self) -> Vec<::M> { - self.pv.clone() - } -} diff --git a/src/strategies/mod.rs b/src/strategies/mod.rs index 175db2c..3b07a58 100644 --- a/src/strategies/mod.rs +++ b/src/strategies/mod.rs @@ -2,8 +2,6 @@ pub mod iterative; #[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] -pub mod lazy_smp; -#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] pub mod mcts; pub mod negamax; pub mod random; diff --git a/tests/strategies.rs b/tests/strategies.rs index 269682c..860e9d5 100644 --- a/tests/strategies.rs +++ b/tests/strategies.rs @@ -190,20 +190,6 @@ fn compare_plain_negamax() { max_depth, b ); - - let mut lazysmp = - LazySmp::new(RandomEvaluator::default(), opt, LazySmpOptions::default()); - lazysmp.set_max_depth(max_depth); - let lazysmp_move = lazysmp.choose_move(&b).unwrap(); - let lazysmp_value = lazysmp.root_value(); - assert_eq!(value, lazysmp_value, "search depth={}\n{}", max_depth, b); - assert!( - plain_negamax.best_moves.contains(&lazysmp_move), - "bad move={:?}\nsearch depth={}\n{}", - lazysmp_move, - max_depth, - b - ); } } } @@ -242,14 +228,7 @@ fn compare_deep_negamax() { parallel.set_max_depth(max_depth); parallel.choose_move(&b).unwrap(); let parallel_value = parallel.root_value(); - assert_eq!(value, parallel_value, "search depth={}\n{}", max_depth, b); - - let mut lazysmp = - LazySmp::new(RandomEvaluator::default(), opt, LazySmpOptions::default()); - lazysmp.set_max_depth(max_depth); - lazysmp.choose_move(&b).unwrap(); - let lazysmp_value = lazysmp.root_value(); - assert_eq!(value, lazysmp_value, "search depth={} iter={}\n{}", max_depth, iter, b); + assert_eq!(value, parallel_value, "search iter={} depth={}\n{}", iter, max_depth, b); } } } From b99f5e8f4627ec0b5ecdcbceb15ea205a59bec2a Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Thu, 3 Nov 2022 22:43:54 -0700 Subject: [PATCH 118/162] Rename Ybw stuff to Parallel. It's the only remaining or planned parallel implementation. --- benches/negamax.rs | 4 +- examples/connect4.rs | 6 +-- src/lib.rs | 2 +- src/strategies/ybw.rs | 100 ++++++++++++++++++++---------------------- tests/strategies.rs | 17 +++---- 5 files changed, 63 insertions(+), 66 deletions(-) diff --git a/benches/negamax.rs b/benches/negamax.rs index ab80cb5..51d2082 100644 --- a/benches/negamax.rs +++ b/benches/negamax.rs @@ -32,10 +32,10 @@ fn bench_iterative(b: &mut Bencher) { fn bench_parallel(b: &mut Bencher) { let board = connect4::Board::default(); b.iter(|| { - let mut s = ParallelYbw::new( + let mut s = ParallelSearch::new( connect4::BasicEvaluator::default(), IterativeOptions::new().with_table_byte_size(32_000), - YbwOptions::new(), + ParallelOptions::new(), ); s.set_max_depth(5); let m = s.choose_move(&board); diff --git a/examples/connect4.rs b/examples/connect4.rs index df811bb..85c7f10 100644 --- a/examples/connect4.rs +++ b/examples/connect4.rs @@ -273,11 +273,11 @@ fn main() { let mut iterative = IterativeSearch::new(BasicEvaluator::default(), opts.clone().with_aspiration_window(5)); iterative.set_max_depth(12); - let mut parallelybw = ParallelYbw::new(BasicEvaluator::default(), opts, YbwOptions::new()); - parallelybw.set_max_depth(12); + let mut parallel = ParallelSearch::new(BasicEvaluator::default(), opts, ParallelOptions::new()); + parallel.set_max_depth(12); let mut strategies: [&mut dyn Strategy; 3] = - [&mut dumb, &mut iterative, &mut parallelybw]; + [&mut dumb, &mut iterative, &mut parallel]; if std::env::args().any(|arg| arg == "parallel") { strategies.swap(1, 2); diff --git a/src/lib.rs b/src/lib.rs index 746ee1b..08f30a7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,5 +13,5 @@ pub use strategies::mcts::{MCTSOptions, MonteCarloTreeSearch}; pub use strategies::negamax::Negamax; pub use strategies::random::Random; #[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] -pub use strategies::ybw::{ParallelYbw, YbwOptions}; +pub use strategies::ybw::{ParallelOptions, ParallelSearch}; pub use util::perft; diff --git a/src/strategies/ybw.rs b/src/strategies/ybw.rs index 82a1a1e..d948e43 100644 --- a/src/strategies/ybw.rs +++ b/src/strategies/ybw.rs @@ -1,9 +1,9 @@ //! An implementation of iterative deeping, with each iteration executed in parallel. //! -//! This implementation uses the Young Brothers Wait Concept, which evaluates -//! the best guess move serially first, then parallelizes all other moves -//! using rayon. This tries to reduce redundant computation at the expense of -//! more board state clones and slightly more thread synchronization. +//! This implementation evaluates the best guess at each move first, then +//! parallelizes all other moves using rayon. +//! +//! This is based on the Young Brothers Wait Concept and CilkChess. extern crate rayon; @@ -22,25 +22,25 @@ use std::time::{Duration, Instant}; /// Options to use for the parallel search engine. #[derive(Clone, Copy)] -pub struct YbwOptions { +pub struct ParallelOptions { pub num_threads: Option, serial_cutoff_depth: u8, pub background_pondering: bool, } -impl YbwOptions { +impl ParallelOptions { pub fn new() -> Self { - YbwOptions { num_threads: None, serial_cutoff_depth: 1, background_pondering: false } + ParallelOptions { num_threads: None, serial_cutoff_depth: 1, background_pondering: false } } } -impl Default for YbwOptions { +impl Default for ParallelOptions { fn default() -> Self { Self::new() } } -impl YbwOptions { +impl ParallelOptions { /// Set the total number of threads to use. Otherwise defaults to num_cpus. pub fn with_num_threads(mut self, num_threads: usize) -> Self { self.num_threads = Some(num_threads); @@ -68,7 +68,7 @@ struct ParallelNegamaxer { table: Arc::M>>, eval: E, opts: IterativeOptions, - ybw_opts: YbwOptions, + par_opts: ParallelOptions, timeout: Arc, stats: ThreadLocal>, move_pool: ThreadLocal::M>>, @@ -83,7 +83,7 @@ where E: Clone + Sync + Send + 'static, { fn new( - opts: IterativeOptions, ybw_opts: YbwOptions, eval: E, + opts: IterativeOptions, par_opts: ParallelOptions, eval: E, table: Arc::M>>, timeout: Arc, thread_pool: &rayon::ThreadPool, ) -> Self { @@ -91,7 +91,7 @@ where table, eval, opts, - ybw_opts, + par_opts, timeout, stats: ThreadLocal::new(CachePadded::default, thread_pool), move_pool: ThreadLocal::new(MovePool::default, thread_pool), @@ -231,7 +231,7 @@ where let (best, best_move) = if alpha >= beta { // Skip search (initial_value, first_move) - } else if self.ybw_opts.serial_cutoff_depth >= depth { + } else if self.par_opts.serial_cutoff_depth >= depth { // Serial search let mut best = initial_value; let mut best_move = first_move; @@ -359,8 +359,8 @@ where if self.opts.verbose && !background { let interval = Instant::now() - interval_start; eprintln!( - "Ybw search (threads={}) depth{:>2} took{:>5}ms; returned{:>5}; bestmove {}", - self.ybw_opts.num_threads(), + "Parallel (threads={}) depth{:>2} took{:>5}ms; returned{:>5}; bestmove {}", + self.par_opts.num_threads(), depth, interval.as_millis(), entry.value_string(), @@ -394,7 +394,7 @@ fn pretty_stats(stats: &Stats, start: Instant) -> String { ) } -pub struct ParallelYbw { +pub struct ParallelSearch { max_depth: u8, max_time: Duration, @@ -407,15 +407,15 @@ pub struct ParallelYbw { thread_pool: rayon::ThreadPool, opts: IterativeOptions, - ybw_opts: YbwOptions, + par_opts: ParallelOptions, } -impl ParallelYbw { - pub fn new(eval: E, opts: IterativeOptions, ybw_opts: YbwOptions) -> ParallelYbw { +impl ParallelSearch { + pub fn new(eval: E, opts: IterativeOptions, par_opts: ParallelOptions) -> ParallelSearch { let table = Arc::new(LockfreeTable::new(opts.table_byte_size)); - let num_threads = ybw_opts.num_threads(); + let num_threads = par_opts.num_threads(); let pool_builder = rayon::ThreadPoolBuilder::new().num_threads(num_threads); - ParallelYbw { + ParallelSearch { max_depth: 99, max_time: Duration::from_secs(5), background_cancel: Arc::new(AtomicBool::new(false)), @@ -424,7 +424,7 @@ impl ParallelYbw { principal_variation: Vec::new(), thread_pool: pool_builder.build().unwrap(), opts, - ybw_opts, + par_opts, eval, } } @@ -435,7 +435,7 @@ impl ParallelYbw { } } -impl Strategy for ParallelYbw +impl Strategy for ParallelSearch where ::S: Clone + Zobrist + Send + Sync, ::M: Copy + Eq + Send + Sync, @@ -451,11 +451,11 @@ where timeout_signal(self.max_time) }; - let best_value_move = { + let (best_move, value) = { let start_time = Instant::now(); let mut negamaxer = ParallelNegamaxer::new( self.opts, - self.ybw_opts, + self.par_opts, self.eval.clone(), self.table.clone(), timeout, @@ -472,33 +472,29 @@ where eprintln!("{}", pretty_stats(&stats, start_time)); } value_move - }; - if let Some((best_move, value)) = best_value_move { - self.prev_value = value; - - if self.ybw_opts.background_pondering { - self.background_cancel = Arc::new(AtomicBool::new(false)); - // Create a separate negamaxer to have a dedicated cancel - // signal, and to allow the negamaxer to outlive this scope. - let negamaxer = ParallelNegamaxer::new( - self.opts, - self.ybw_opts, - self.eval.clone(), - self.table.clone(), - self.background_cancel.clone(), - &self.thread_pool, - ); - let mut state = s.clone(); - best_move.apply(&mut state); - // Launch in threadpool asynchronously. - self.thread_pool.spawn(move || { - negamaxer.iterative_search(state, 99, true); - }); - } - Some(best_move) - } else { - None + }?; + self.prev_value = value; + + if self.par_opts.background_pondering { + self.background_cancel = Arc::new(AtomicBool::new(false)); + // Create a separate negamaxer to have a dedicated cancel + // signal, and to allow the negamaxer to outlive this scope. + let negamaxer = ParallelNegamaxer::new( + self.opts, + self.par_opts, + self.eval.clone(), + self.table.clone(), + self.background_cancel.clone(), + &self.thread_pool, + ); + let mut state = s.clone(); + best_move.apply(&mut state); + // Launch in threadpool asynchronously. + self.thread_pool.spawn(move || { + negamaxer.iterative_search(state, 99, true); + }); } + Some(best_move) } fn set_timeout(&mut self, max_time: Duration) { @@ -516,7 +512,7 @@ where } } -impl Drop for ParallelYbw { +impl Drop for ParallelSearch { fn drop(&mut self) { self.background_cancel.store(true, Ordering::SeqCst); } diff --git a/tests/strategies.rs b/tests/strategies.rs index 860e9d5..ee25a69 100644 --- a/tests/strategies.rs +++ b/tests/strategies.rs @@ -178,15 +178,16 @@ fn compare_plain_negamax() { } let opt = IterativeOptions::new().with_table_byte_size(64000); - let mut ybw = ParallelYbw::new(RandomEvaluator::default(), opt, YbwOptions::default()); - ybw.set_max_depth(max_depth); - let ybw_move = ybw.choose_move(&b).unwrap(); - let ybw_value = ybw.root_value(); - assert_eq!(value, ybw_value, "search depth={}\n{}", max_depth, b); + let mut parallel = + ParallelSearch::new(RandomEvaluator::default(), opt, ParallelOptions::default()); + parallel.set_max_depth(max_depth); + let par_move = parallel.choose_move(&b).unwrap(); + let par_value = parallel.root_value(); + assert_eq!(value, par_value, "search depth={}\n{}", max_depth, b); assert!( - plain_negamax.best_moves.contains(&ybw_move), + plain_negamax.best_moves.contains(&par_move), "bad move={:?}\nsearch depth={}\n{}", - ybw_move, + par_move, max_depth, b ); @@ -224,7 +225,7 @@ fn compare_deep_negamax() { assert_eq!(value, mtdf_value, "search depth={}\n{}", max_depth, b); let mut parallel = - ParallelYbw::new(RandomEvaluator::default(), opt, YbwOptions::default()); + ParallelSearch::new(RandomEvaluator::default(), opt, ParallelOptions::default()); parallel.set_max_depth(max_depth); parallel.choose_move(&b).unwrap(); let parallel_value = parallel.root_value(); From ed68b4800d94a82419b47cde7ed23643f0b56c5b Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Fri, 4 Nov 2022 21:13:24 -0700 Subject: [PATCH 119/162] Add a frontpage to rustdoc, and give it the simplest example. --- src/lib.rs | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 72 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 08f30a7..4572b5f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,75 @@ -extern crate rand; +//! The `minimax` library provides interfaces for defining two-player +//! perfect-knowledge games, and strategies for choosing moves. +//! +//! Any game can be defined by implementing 2 traits: Game and Move. +//! ``` +//! use minimax::Strategy; +//! +//! // Stateless rules object. +//! struct TugOfWar; +//! // State of the game. +//! #[derive(Clone)] +//! struct War(i8); +//! // A move that a player can make. +//! #[derive(Copy, Clone, Debug, Eq, PartialEq)] +//! struct Tug(i8); +//! +//! impl minimax::Game for TugOfWar { +//! type S = War; +//! type M = Tug; +//! +//! fn generate_moves(s: &War, moves: &mut Vec) { +//! moves.push(Tug(-1)); +//! moves.push(Tug(1)); +//! } +//! +//! fn get_winner(state: &War) -> Option { +//! if state.0 > 9 { +//! Some(if state.0 % 2 == 0 { +//! minimax::Winner::PlayerJustMoved +//! } else { +//! minimax::Winner::PlayerToMove +//! }) +//! } else if state.0 < 9 { +//! Some(if state.0 % 2 == 0 { +//! minimax::Winner::PlayerToMove +//! } else { +//! minimax::Winner::PlayerJustMoved +//! }) +//! } else { +//! None +//! } +//! } +//! } +//! +//! impl minimax::Move for Tug { +//! type G = TugOfWar; +//! fn apply(&self, state: &mut War) { +//! state.0 += self.0 +//! } +//! fn undo(&self, state: &mut War) { +//! state.0 -= self.0 +//! } +//! } +//! +//! // To run the search we need an evaluator. +//! struct Eval; +//! impl minimax::Evaluator for Eval { +//! type G = TugOfWar; +//! fn evaluate(&self, state: &War) -> minimax::Evaluation { +//! if state.0 % 2 == 0 { +//! state.0 as minimax::Evaluation +//! } else { +//! -state.0 as minimax::Evaluation +//! } +//! } +//! } +//! +//! // Now we can use a simple Strategy to find a move from the initial state. +//! let start = War(0); +//! let mut strategy = minimax::Negamax::new(Eval{}, 3); +//! let best_move = strategy.choose_move(&start).unwrap(); +//! ``` pub mod interface; pub mod strategies; From 8f0a4d39317869633175034117954209a220ad8b Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Fri, 4 Nov 2022 21:19:36 -0700 Subject: [PATCH 120/162] Deflake mcts test. <1/1000 flakiness now --- tests/ttt.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/ttt.rs b/tests/ttt.rs index ba23bcb..4adc765 100644 --- a/tests/ttt.rs +++ b/tests/ttt.rs @@ -30,9 +30,9 @@ fn test_ttt_negamax_vs_random_always_wins_or_draws() { #[test] fn test_ttt_mcts_vs_random_always_wins_or_draws() { let mut s1 = MonteCarloTreeSearch::new(MCTSOptions::default().with_num_threads(1)); - s1.set_max_rollouts(100); + s1.set_max_rollouts(500); let mut s2 = Random::new(); - for _ in 0..100 { + for _ in 0..10 { assert_ne!(battle_royale::(&mut s1, &mut s2), Some(1)); } } From ec4c9c603ff3218045544780962d1cc081874390 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Fri, 4 Nov 2022 21:32:44 -0700 Subject: [PATCH 121/162] Simplify wasm32 config checks. --- Cargo.toml | 4 ++-- src/lib.rs | 4 ++-- src/strategies/iterative.rs | 29 +++++++++++++++-------------- src/strategies/mod.rs | 6 +++--- src/strategies/util.rs | 4 ++-- src/util.rs | 8 ++++---- 6 files changed, 28 insertions(+), 27 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 4c9640c..3f1c8f1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,11 +14,11 @@ license = "MIT" instant = { version = "0.1", features = ["wasm-bindgen"] } rand = "0.8" -[target.'cfg(not(all(target_arch="wasm32", target_os="unknown")))'.dependencies] +[target.'cfg(not(target_arch="wasm32"))'.dependencies] num_cpus = "1.0" rayon = "^1.5" -[target.'cfg(all(target_arch="wasm32", target_os="unknown"))'.dependencies] +[target.'cfg(target_arch="wasm32")'.dependencies] getrandom = { version = "0.2", features = ["js"]} [dev-dependencies] diff --git a/src/lib.rs b/src/lib.rs index 4572b5f..ab08575 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -79,10 +79,10 @@ pub use interface::{ Evaluation, Evaluator, Game, Move, Strategy, Winner, Zobrist, BEST_EVAL, WORST_EVAL, }; pub use strategies::iterative::{IterativeOptions, IterativeSearch, Replacement}; -#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] +#[cfg(not(target_arch = "wasm32"))] pub use strategies::mcts::{MCTSOptions, MonteCarloTreeSearch}; pub use strategies::negamax::Negamax; pub use strategies::random::Random; -#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] +#[cfg(not(target_arch = "wasm32"))] pub use strategies::ybw::{ParallelOptions, ParallelSearch}; pub use util::perft; diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index f44f135..1cb4c47 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -6,7 +6,7 @@ use super::super::interface::*; use super::super::util::*; -#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] +#[cfg(not(target_arch = "wasm32"))] use super::sync_util::timeout_signal; use super::table::*; use super::util::*; @@ -14,9 +14,9 @@ use super::util::*; use instant::Instant; use rand::prelude::SliceRandom; use std::cmp::max; -#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] +#[cfg(not(target_arch = "wasm32"))] use std::sync::atomic::{AtomicBool, Ordering}; -#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] +#[cfg(not(target_arch = "wasm32"))] use std::sync::Arc; use std::time::Duration; @@ -280,6 +280,7 @@ impl Stats { self.total_generated_moves += num_moves as u64; } + #[cfg(not(target_arch = "wasm32"))] pub(crate) fn add(&mut self, other: &Self) { self.nodes_explored += other.nodes_explored; self.total_generate_move_calls += other.total_generate_move_calls; @@ -288,11 +289,11 @@ impl Stats { } pub(super) struct Negamaxer { - #[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] + #[cfg(not(target_arch = "wasm32"))] timeout: Arc, - #[cfg(all(target_arch = "wasm32", target_os = "unknown"))] + #[cfg(target_arch = "wasm32")] deadline: Instant, - #[cfg(all(target_arch = "wasm32", target_os = "unknown"))] + #[cfg(target_arch = "wasm32")] timeout_counter: u32, pub(super) table: T, pub(super) countermoves: CounterMoves<::M>, @@ -310,11 +311,11 @@ where { pub(super) fn new(table: T, eval: E, opts: IterativeOptions) -> Self { Self { - #[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] + #[cfg(not(target_arch = "wasm32"))] timeout: Arc::new(AtomicBool::new(false)), - #[cfg(all(target_arch = "wasm32", target_os = "unknown"))] + #[cfg(target_arch = "wasm32")] deadline: Instant::now(), - #[cfg(all(target_arch = "wasm32", target_os = "unknown"))] + #[cfg(target_arch = "wasm32")] timeout_counter: 1000, table, countermoves: CounterMoves::new(opts.countermove_table, opts.countermove_history_table), @@ -325,12 +326,12 @@ where } } - #[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] + #[cfg(not(target_arch = "wasm32"))] pub(super) fn set_timeout(&mut self, timeout: Arc) { self.timeout = timeout; } - #[cfg(all(target_arch = "wasm32", target_os = "unknown"))] + #[cfg(target_arch = "wasm32")] fn reset_timeout(&mut self, duration: Duration) { self.timeout_counter = if duration == Duration::new(0, 0) { // Too high counter that never hits the maximum. @@ -340,7 +341,7 @@ where }; self.deadline = Instant::now() + duration; } - #[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] + #[cfg(not(target_arch = "wasm32"))] fn reset_timeout(&mut self, duration: Duration) { self.set_timeout(if duration == Duration::new(0, 0) { Arc::new(AtomicBool::new(false)) @@ -349,7 +350,7 @@ where }); } - #[cfg(all(target_arch = "wasm32", target_os = "unknown"))] + #[cfg(target_arch = "wasm32")] fn timeout_check(&mut self) -> bool { self.timeout_counter += 1; if self.timeout_counter != 100 { @@ -358,7 +359,7 @@ where self.timeout_counter = 0; Instant::now() >= self.deadline } - #[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] + #[cfg(not(target_arch = "wasm32"))] fn timeout_check(&mut self) -> bool { self.timeout.load(Ordering::Relaxed) } diff --git a/src/strategies/mod.rs b/src/strategies/mod.rs index 3b07a58..6021134 100644 --- a/src/strategies/mod.rs +++ b/src/strategies/mod.rs @@ -1,14 +1,14 @@ //! Strategy implementations. pub mod iterative; -#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] +#[cfg(not(target_arch = "wasm32"))] pub mod mcts; pub mod negamax; pub mod random; -#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] +#[cfg(not(target_arch = "wasm32"))] pub mod ybw; -#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] +#[cfg(not(target_arch = "wasm32"))] mod sync_util; mod table; mod util; diff --git a/src/strategies/util.rs b/src/strategies/util.rs index 0a6a1ed..f9df67a 100644 --- a/src/strategies/util.rs +++ b/src/strategies/util.rs @@ -82,7 +82,7 @@ impl ValueMove { Self { value, m } } - #[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] + #[cfg(not(target_arch = "wasm32"))] pub(super) fn max(&mut self, value: Evaluation, m: M) { if value > self.value { self.value = value; @@ -90,7 +90,7 @@ impl ValueMove { } } - #[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] + #[cfg(not(target_arch = "wasm32"))] pub(super) fn into_inner(self) -> (Evaluation, M) { (self.value, self.m) } diff --git a/src/util.rs b/src/util.rs index 5a932be..c4d5b0d 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1,12 +1,12 @@ //! Utility functions for testing, and tests. -#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] +#[cfg(not(target_arch = "wasm32"))] extern crate rayon; use super::interface; use super::interface::{Game, Move}; -#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] +#[cfg(not(target_arch = "wasm32"))] use rayon::prelude::*; use std::default::Default; use std::time::Instant; @@ -61,7 +61,7 @@ impl MovePool { } } -#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] +#[cfg(not(target_arch = "wasm32"))] fn perft_recurse( pool: &mut MovePool, state: &mut G::S, depth: u8, single_thread_cutoff: u8, ) -> u64 @@ -106,7 +106,7 @@ where n } -#[cfg(all(target_arch = "wasm32", target_os = "unknown"))] +#[cfg(target_arch = "wasm32")] fn perft_recurse( pool: &mut MovePool, state: &mut G::S, depth: u8, single_thread_cutoff: u8, ) -> u64 From 40e527086f733348971ee5c6ac3da1f43b4d0efe Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Fri, 4 Nov 2022 21:34:06 -0700 Subject: [PATCH 122/162] Upgrade to 2021 edition rust. --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 3f1c8f1..8d884e7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,7 +4,7 @@ version = "0.3.0" authors = [ "Samuel Fredrickson ", "Eric Roshan-Eisner " ] description = "Generic implementations of Minimax." documentation = "https://docs.rs/minimax" -edition = "2018" +edition = "2021" repository = "https://github.com/edre/minimax-rs" readme = "README.md" keywords = ["ai", "game", "minimax", "negamax"] From 1efd3167e862a97d66a2731da669b72445ce9928 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Fri, 4 Nov 2022 21:37:34 -0700 Subject: [PATCH 123/162] Release 0.4.0 * Enable wasm32 target. * Add countermove and history table to IterativeOptions. * Remove LazySMP and rename ParallelYbw to ParallelSearch. * Add various thread-local stats and upgrades to ParallelSearch. * Rework types for Random and MCTS strategy to make them less annoying to use. * Add example using third-party chess library. --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 8d884e7..2b3e03f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "minimax" -version = "0.3.0" +version = "0.4.0" authors = [ "Samuel Fredrickson ", "Eric Roshan-Eisner " ] description = "Generic implementations of Minimax." documentation = "https://docs.rs/minimax" From c62d4889d0ea2fc52b89de665cc08b1a73a40418 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Fri, 4 Nov 2022 21:43:41 -0700 Subject: [PATCH 124/162] Remove redundant license text from README. --- README.md | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/README.md b/README.md index 1fe1cfe..8cfefaa 100644 --- a/README.md +++ b/README.md @@ -24,25 +24,3 @@ evaluator. The `ttt` and `connect4` modules contain implementations of Tic-Tac-Toe and Connect Four, demonstrating how to use the game and evaluation interfaces. `test` shows how to use strategies. - -## License - - Copyright (c) 2015 Samuel Fredrickson - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. From b01282073a8119efe4761aa88cf1c0aab18302af Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Tue, 14 Mar 2023 09:50:19 -0700 Subject: [PATCH 125/162] Don't die if strategies are given a winning position. --- examples/connect4.rs | 2 +- src/lib.rs | 2 +- src/strategies/iterative.rs | 3 +++ src/strategies/negamax.rs | 3 +++ src/strategies/ybw.rs | 3 +++ tests/strategies.rs | 24 ++++++++++++++++++++++++ 6 files changed, 35 insertions(+), 2 deletions(-) diff --git a/examples/connect4.rs b/examples/connect4.rs index 85c7f10..803f1f4 100644 --- a/examples/connect4.rs +++ b/examples/connect4.rs @@ -86,7 +86,7 @@ impl Display for Board { #[derive(Copy, Clone, Debug, Eq, PartialEq)] pub struct Place { - col: u8, + pub col: u8, } impl Place { diff --git a/src/lib.rs b/src/lib.rs index ab08575..3c7cf3c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -30,7 +30,7 @@ //! } else { //! minimax::Winner::PlayerToMove //! }) -//! } else if state.0 < 9 { +//! } else if state.0 < -9 { //! Some(if state.0 % 2 == 0 { //! minimax::Winner::PlayerToMove //! } else { diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index 1cb4c47..42d620a 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -636,6 +636,9 @@ where ::M: Copy + Eq, { fn choose_move(&mut self, s: &::S) -> Option<::M> { + if E::G::get_winner(s).is_some() { + return None; + } self.negamaxer.table.advance_generation(); self.negamaxer.countermoves.advance_generation(E::G::null_move(s)); // Reset stats. diff --git a/src/strategies/negamax.rs b/src/strategies/negamax.rs index 8b8815e..0b9bb71 100644 --- a/src/strategies/negamax.rs +++ b/src/strategies/negamax.rs @@ -72,6 +72,9 @@ where if self.max_depth == 0 { return None; } + if E::G::get_winner(s).is_some() { + return None; + } let mut best = WORST_EVAL; let mut moves = self.move_pool.alloc(); E::G::generate_moves(s, &mut moves); diff --git a/src/strategies/ybw.rs b/src/strategies/ybw.rs index d948e43..e8f1e05 100644 --- a/src/strategies/ybw.rs +++ b/src/strategies/ybw.rs @@ -442,6 +442,9 @@ where E: Clone + Sync + Send + 'static, { fn choose_move(&mut self, s: &::S) -> Option<::M> { + if E::G::get_winner(s).is_some() { + return None; + } // Cancel any ongoing background processing. self.background_cancel.store(true, Ordering::SeqCst); // Start timer if configured. diff --git a/tests/strategies.rs b/tests/strategies.rs index ee25a69..05e90d3 100644 --- a/tests/strategies.rs +++ b/tests/strategies.rs @@ -118,6 +118,30 @@ fn generate_random_state(depth: u8) -> connect4::Board { b } +#[test] +fn test_winning_position() { + let mut b = connect4::Board::default(); + connect4::Place { col: 2 }.apply(&mut b); + connect4::Place { col: 3 }.apply(&mut b); + connect4::Place { col: 2 }.apply(&mut b); + connect4::Place { col: 3 }.apply(&mut b); + connect4::Place { col: 2 }.apply(&mut b); + connect4::Place { col: 3 }.apply(&mut b); + connect4::Place { col: 2 }.apply(&mut b); + assert_eq!(Some(Winner::PlayerJustMoved), connect4::Game::get_winner(&b)); + + // Make sure none of the strategies die when given a winning position. + assert_eq!(None, Negamax::new(RandomEvaluator, 4).choose_move(&b)); + + let opt = IterativeOptions::new(); + assert_eq!(None, IterativeSearch::new(RandomEvaluator::default(), opt).choose_move(&b)); + assert_eq!( + None, + ParallelSearch::new(RandomEvaluator::default(), opt, ParallelOptions::default()) + .choose_move(&b) + ); +} + #[test] fn compare_plain_negamax() { for _ in 0..100 { From 22e78b026e8183e208c6f8c8408954890a1a336f Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Tue, 14 Mar 2023 22:23:55 -0700 Subject: [PATCH 126/162] Use fancier atomics orderings. No effect on benchmarks. But it seems neater. --- src/strategies/table.rs | 13 +++++++------ src/strategies/ybw.rs | 4 ++-- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/strategies/table.rs b/src/strategies/table.rs index 6db798e..aff6d96 100644 --- a/src/strategies/table.rs +++ b/src/strategies/table.rs @@ -244,7 +244,7 @@ impl ConcurrentTable for RacyTable { } fn concurrent_advance_generation(&self) { - self.generation.fetch_add(1, Ordering::SeqCst); + self.generation.fetch_add(1, Ordering::Relaxed); } } @@ -271,7 +271,7 @@ impl Table for LockfreeTable { fn lookup(&self, hash: u64) -> Option> { let index = (hash as usize) & self.mask; let entry = &self.table[index]; - let table_hash = entry.high_hash.load(Ordering::SeqCst); + let table_hash = entry.high_hash.load(Ordering::Acquire); if high_bits(hash) | 1 == table_hash | 1 { // Copy contents let ret = Some(Entry { @@ -329,9 +329,10 @@ impl ConcurrentTable for LockfreeTable { let table_gen = self.generation.load(Ordering::Relaxed); let index = (hash as usize) & self.mask; let entry = &self.table[index]; + // TODO: some not-totally racy reads of generation and depth if entry.generation != table_gen || entry.depth <= depth { // Set hash to sentinel value during write. - let x = entry.high_hash.load(Ordering::SeqCst); + let x = entry.high_hash.load(Ordering::Acquire); if x == Self::WRITING_SENTINEL { // Someone's already writing, just forget it. return; @@ -342,7 +343,7 @@ impl ConcurrentTable for LockfreeTable { .compare_exchange_weak( x, Self::WRITING_SENTINEL, - Ordering::SeqCst, + Ordering::Acquire, Ordering::Relaxed, ) .is_err() @@ -370,12 +371,12 @@ impl ConcurrentTable for LockfreeTable { } else { high_bits(hash) }; - entry.high_hash.store(new_hash, Ordering::SeqCst); + entry.high_hash.store(new_hash, Ordering::Release); } } fn concurrent_advance_generation(&self) { - self.generation.fetch_add(1, Ordering::SeqCst); + self.generation.fetch_add(1, Ordering::Relaxed); } } diff --git a/src/strategies/ybw.rs b/src/strategies/ybw.rs index e8f1e05..3b26f62 100644 --- a/src/strategies/ybw.rs +++ b/src/strategies/ybw.rs @@ -446,7 +446,7 @@ where return None; } // Cancel any ongoing background processing. - self.background_cancel.store(true, Ordering::SeqCst); + self.background_cancel.store(true, Ordering::Relaxed); // Start timer if configured. let timeout = if self.max_time == Duration::new(0, 0) { Arc::new(AtomicBool::new(false)) @@ -517,6 +517,6 @@ where impl Drop for ParallelSearch { fn drop(&mut self) { - self.background_cancel.store(true, Ordering::SeqCst); + self.background_cancel.store(true, Ordering::Relaxed); } } From 37e53b3fdab368e3623b248e23c47f1376de6427 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Tue, 21 Mar 2023 10:18:26 -0700 Subject: [PATCH 127/162] Fix clippy lints. --- src/strategies/iterative.rs | 4 ++-- src/strategies/ybw.rs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index 42d620a..07f1645 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -661,11 +661,11 @@ where let mut moves = moves.into_iter().map(|m| ValueMove::new(0, m)).collect::>(); // Start at 1 or 2 to hit the max depth. - let mut depth = self.max_depth as u8 % self.opts.step_increment; + let mut depth = self.max_depth % self.opts.step_increment; if depth == 0 { depth = self.opts.step_increment; } - while depth <= self.max_depth as u8 { + while depth <= self.max_depth { interval_start = Instant::now(); let search = if self.opts.mtdf { self.mtdf(&mut s_clone, depth, self.prev_value) diff --git a/src/strategies/ybw.rs b/src/strategies/ybw.rs index 3b26f62..d05a237 100644 --- a/src/strategies/ybw.rs +++ b/src/strategies/ybw.rs @@ -335,7 +335,7 @@ where if depth == 0 { depth = self.opts.step_increment; } - while depth <= max_depth as u8 { + while depth <= max_depth { interval_start = Instant::now(); if self.negamax(&mut state, None, depth, WORST_EVAL, BEST_EVAL).is_none() { // Timeout. Return the best move from the previous depth. From d5ba3c0c4bfb46e5974ef22f2a48a7e80290ec54 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Tue, 21 Mar 2023 11:55:25 -0700 Subject: [PATCH 128/162] Fix null move pruning. Apparently it just always pruned? How did this not turn out completely bogus results in nokamute testing. --- src/strategies/iterative.rs | 4 +++- src/strategies/ybw.rs | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index 07f1645..6df4585 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -380,7 +380,9 @@ where let value = -self.negamax(s, None, depth - depth_reduction, -beta, -beta + 1)?; null_move.undo(s); // is the result still so good that we shouldn't bother with a full search? - return Some(value); + if value >= beta { + return Some(value); + } } } // If we didn't check, return a low value that won't trigger beta cutoff. diff --git a/src/strategies/ybw.rs b/src/strategies/ybw.rs index d05a237..6613d2d 100644 --- a/src/strategies/ybw.rs +++ b/src/strategies/ybw.rs @@ -123,7 +123,9 @@ where let value = -self.negamax(s, None, depth - depth_reduction, -beta, -beta + 1)?; null_move.undo(s); // is the result still so good that we shouldn't bother with a full search? - return Some(value); + if value >= beta { + return Some(value); + } } } // If we didn't check, return a low value that won't trigger beta cutoff. From 0b30ae6bf4197490a863a816656be5bed3188de1 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Sun, 19 Mar 2023 06:49:11 -0700 Subject: [PATCH 129/162] Experimental unified Game trait. --- src/interface.rs | 70 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/src/interface.rs b/src/interface.rs index 6372f1c..2e56a86 100644 --- a/src/interface.rs +++ b/src/interface.rs @@ -153,6 +153,76 @@ pub trait Game: Sized { } } +/// An alternate system for defining a game. +pub trait UniGame { + /// The type of the game state. + type S; + /// The type of game moves. + type M: Copy; + + /// Generate moves at the given state. + fn generate_moves(state: &Self::S, moves: &mut Vec); + + /// Apply a move to get a new state. + /// This method supports two different implementation strategies: + /// + /// 1) Games with large state that want to update in place. + /// ``` + /// struct BigBoard([u8; 4096]); + /// struct BigMove(u16); + /// fn apply(state: &mut BigBoard, m: &BigMove) -> Option { + /// state.0[m.0 as usize] += 1; + /// None + /// } + /// fn undo(state: &mut BigBoard, m: &BigMove) { + /// state.0[m.0 as usize] -= 1; + /// } + /// ``` + /// + /// 2) Games with small state that don't want to implement undo. + /// ``` + /// struct SmallBoard(u64); + /// struct SmallMove(u8); + /// fn apply(state: &mut SmallBoard, m: &SmallMove) -> Option { + /// Some(SmallBoard(state.0 | (1< Option; + fn undo(_state: &mut Self::S, _m: &Self::M) {} + + /// Returns `Some(PlayerJustMoved)` or `Some(PlayerToMove)` if there's a winner, + /// `Some(Draw)` if the state is terminal without a winner, and `None` if + /// the state is non-terminal. + fn get_winner(state: &Self::S) -> Option; + + /// Hash of the game state. + /// Expected to be pre-calculated and cheaply updated with each apply. + fn zobrist_hash(_state: &Self::S) -> u64 { + unimplemented!("game has not implemented zobrist hash"); + } + + /// Optional method to return a move that does not change the board state. + /// This does not need to be a legal move from this position, but it is + /// used in some strategies to reject a position early if even passing gives + /// a good position for the opponent. + fn null_move(_state: &Self::S) -> Option { + None + } + + /// Return a human-readable notation for this move in this game state. + fn notation(_state: &Self::S, _move: &Self::M) -> Option { + None + } + /// Return a small index for this move for position-independent tables. + fn table_index(_: &Self::M) -> u16 { + 0 + } + /// Maximum index value. + fn max_table_index() -> u16 { + 0 + } +} + /// Defines a method of choosing a move for the current player. pub trait Strategy { fn choose_move(&mut self, state: &G::S) -> Option; From 9c756090093ed1908fbbccdd648d77e531e09b3f Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Sun, 19 Mar 2023 07:22:56 -0700 Subject: [PATCH 130/162] Full prototype of experimental unified Game trait. --- src/interface.rs | 12 +++++ src/lib.rs | 4 +- tests/strategies.rs | 112 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 125 insertions(+), 3 deletions(-) diff --git a/src/interface.rs b/src/interface.rs index 2e56a86..374826e 100644 --- a/src/interface.rs +++ b/src/interface.rs @@ -65,6 +65,14 @@ pub trait Evaluator { } } +pub trait UniEval { + /// The type of game that can be evaluated. + type G: UniGame; + /// Evaluate the non-terminal state from the persective of the player to + /// move next. + fn evaluate(&self, s: &::S) -> Evaluation; +} + /// Defines how a move affects the game state. /// /// A move is able to change initial `Game` state, as well as revert the state. @@ -223,6 +231,10 @@ pub trait UniGame { } } +pub trait UniStrat { + fn choose_move(&mut self, state: &G::S) -> Option; +} + /// Defines a method of choosing a move for the current player. pub trait Strategy { fn choose_move(&mut self, state: &G::S) -> Option; diff --git a/src/lib.rs b/src/lib.rs index 3c7cf3c..fa912ae 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -75,9 +75,7 @@ pub mod interface; pub mod strategies; pub mod util; -pub use interface::{ - Evaluation, Evaluator, Game, Move, Strategy, Winner, Zobrist, BEST_EVAL, WORST_EVAL, -}; +pub use interface::*; pub use strategies::iterative::{IterativeOptions, IterativeSearch, Replacement}; #[cfg(not(target_arch = "wasm32"))] pub use strategies::mcts::{MCTSOptions, MonteCarloTreeSearch}; diff --git a/tests/strategies.rs b/tests/strategies.rs index 05e90d3..7bd5d27 100644 --- a/tests/strategies.rs +++ b/tests/strategies.rs @@ -80,6 +80,118 @@ where } } +pub struct UniNegamax { + depth: u8, + root_value: Evaluation, + // All moves tied with the best valuation. + best_moves: Vec<::M>, + eval: E, +} + +impl UniNegamax { + pub fn new(eval: E, depth: u8) -> UniNegamax { + UniNegamax { depth: depth, root_value: 0, best_moves: Vec::new(), eval } + } + + fn negamax(&self, s: &mut ::S, depth: u8) -> Evaluation + where + <::G as UniGame>::M: Copy, + { + if let Some(winner) = E::G::get_winner(s) { + return winner.evaluate(); + } + if depth == 0 { + return self.eval.evaluate(s); + } + let mut moves = Vec::new(); + ::G::generate_moves(s, &mut moves); + let mut best = WORST_EVAL; + for m in moves.iter() { + { + let mut x = ::G::apply(s, &m); + let mut new = x.as_mut().unwrap_or(s); + let value = -self.negamax(&mut new, depth - 1); + best = max(best, value); + } + ::G::undo(s, m); + } + best + } +} + +impl UniStrat for UniNegamax +where + ::S: Clone, + ::M: Copy, +{ + fn choose_move(&mut self, s: &::S) -> Option<::M> { + let mut moves = Vec::new(); + E::G::generate_moves(s, &mut moves); + + self.best_moves.clear(); + let mut best_value = WORST_EVAL; + let mut s_clone = s.clone(); + for &m in moves.iter() { + let value = { + let s = &mut s_clone; + let mut x = ::G::apply(s, &m); + let mut new = x.as_mut().unwrap_or(s); + -self.negamax(&mut new, self.depth - 1) + }; + ::G::undo(&mut s_clone, &m); + if value == best_value { + self.best_moves.push(m); + } else if value > best_value { + best_value = value; + self.best_moves.clear(); + self.best_moves.push(m); + } + } + self.root_value = best_value; + self.best_moves.first().map(|m| *m) + } +} + +#[derive(Copy, Clone)] +struct TugBoard(i8); +#[derive(Copy, Clone)] +struct TugMove(i8); +struct TugGame; + +impl minimax::UniGame for TugGame { + type S = TugBoard; + type M = TugMove; + + fn generate_moves(_b: &TugBoard, moves: &mut Vec) { + moves.push(TugMove(1)); + moves.push(TugMove(-1)); + } + + fn apply(b: &mut TugBoard, m: &TugMove) -> Option { + Some(TugBoard(b.0 + m.0)) + } + + fn get_winner(_b: &TugBoard) -> Option { + None + } +} + +#[derive(Clone)] +struct UniRandom; + +impl Default for UniRandom { + fn default() -> Self { + Self {} + } +} + +impl minimax::UniEval for UniRandom { + type G = TugGame; + fn evaluate(&self, b: &TugBoard) -> minimax::Evaluation { + b.0 as minimax::Evaluation + } +} + #[derive(Clone)] struct RandomEvaluator; From 10cca6526bd42690b2a4d59191fd46cf1db20072 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Tue, 21 Mar 2023 10:16:26 -0700 Subject: [PATCH 131/162] Convenience function for applying moves for either implementation. --- tests/strategies.rs | 47 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 39 insertions(+), 8 deletions(-) diff --git a/tests/strategies.rs b/tests/strategies.rs index 7bd5d27..a3c8011 100644 --- a/tests/strategies.rs +++ b/tests/strategies.rs @@ -80,6 +80,42 @@ where } } +struct AppliedMove<'a, G: UniGame> { + old: &'a mut ::S, + new: Option<::S>, + m: ::M, +} + +impl<'a, G: UniGame> std::ops::Deref for AppliedMove<'a, G> { + type Target = ::S; + fn deref(&self) -> &::S { + self.new.as_ref().unwrap_or(self.old) + } +} + +impl<'a, G: UniGame> std::ops::DerefMut for AppliedMove<'a, G> { + fn deref_mut(&mut self) -> &mut ::S { + self.new.as_mut().unwrap_or(self.old) + } +} + +impl<'a, G: UniGame> Drop for AppliedMove<'a, G> { + fn drop(&mut self) { + ::undo(self.old, &self.m) + } +} + +impl<'a, G: UniGame> AppliedMove<'a, G> { + fn new(old: &'a mut ::S, m: ::M) -> Self { + let new = ::apply(old, &m); + AppliedMove { old, new, m } + } + + fn get(&mut self) -> &mut ::S { + self.new.as_mut().unwrap_or(self.old) + } +} + pub struct UniNegamax { depth: u8, root_value: Evaluation, @@ -108,12 +144,10 @@ impl UniNegamax { let mut best = WORST_EVAL; for m in moves.iter() { { - let mut x = ::G::apply(s, &m); - let mut new = x.as_mut().unwrap_or(s); - let value = -self.negamax(&mut new, depth - 1); + let mut new = AppliedMove::::new(s, *m); + let value = -self.negamax(new.get(), depth - 1); best = max(best, value); } - ::G::undo(s, m); } best } @@ -133,12 +167,9 @@ where let mut s_clone = s.clone(); for &m in moves.iter() { let value = { - let s = &mut s_clone; - let mut x = ::G::apply(s, &m); - let mut new = x.as_mut().unwrap_or(s); + let mut new = AppliedMove::::new(&mut s_clone, m); -self.negamax(&mut new, self.depth - 1) }; - ::G::undo(&mut s_clone, &m); if value == best_value { self.best_moves.push(m); } else if value > best_value { From de23fa55182ab8a93bb3280734c1d08417dcf7fd Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Tue, 21 Mar 2023 15:43:24 -0700 Subject: [PATCH 132/162] Refactor everything to the new unified Game trait. Unifies Game, Move, Zobrist traits into one trait. Unifies interface for mutatey games and copy-happy games. --- examples/chess/src/main.rs | 64 ++++--------- examples/connect4.rs | 47 ++++------ examples/ttt.rs | 28 +++--- src/interface.rs | 108 +++------------------- src/lib.rs | 10 +- src/strategies/iterative.rs | 42 ++++----- src/strategies/mcts.rs | 10 +- src/strategies/negamax.rs | 10 +- src/strategies/table.rs | 45 ++++----- src/strategies/util.rs | 32 +++---- src/strategies/ybw.rs | 59 ++++++------ src/util.rs | 64 ++++++++++--- tests/strategies.rs | 176 ++++-------------------------------- 13 files changed, 226 insertions(+), 469 deletions(-) diff --git a/examples/chess/src/main.rs b/examples/chess/src/main.rs index de5a2cd..e357a91 100644 --- a/examples/chess/src/main.rs +++ b/examples/chess/src/main.rs @@ -1,64 +1,39 @@ extern crate chess; extern crate minimax; -use minimax::{Game, Move, Strategy}; +use chess::{Board, BoardStatus, ChessMove, MoveGen}; +use minimax::{Game, Strategy}; struct Chess; -// Using newtypes to get external chess impl to implement minimax traits. -#[derive(Clone)] -struct Board { - history: Vec, -} -#[derive(Copy, Clone, Eq, PartialEq)] -struct ChessMove(chess::ChessMove); - -impl Board { - fn new() -> Self { - Self { history: vec![chess::Board::default()] } - } - fn board(&self) -> &chess::Board { - self.history.last().unwrap() - } -} - -impl minimax::Zobrist for Board { - fn zobrist_hash(&self) -> u64 { - self.board().get_hash() - } -} - impl minimax::Game for Chess { type S = Board; type M = ChessMove; fn generate_moves(b: &Board, moves: &mut Vec) { - for m in chess::MoveGen::new_legal(b.board()) { - moves.push(ChessMove(m)); + for m in MoveGen::new_legal(b) { + moves.push(m); } } fn get_winner(b: &Board) -> Option { - match b.board().status() { - chess::BoardStatus::Ongoing => None, - chess::BoardStatus::Stalemate => Some(minimax::Winner::Draw), - chess::BoardStatus::Checkmate => Some(minimax::Winner::PlayerJustMoved), + match b.status() { + BoardStatus::Ongoing => None, + BoardStatus::Stalemate => Some(minimax::Winner::Draw), + BoardStatus::Checkmate => Some(minimax::Winner::PlayerJustMoved), } } -} -impl minimax::Move for ChessMove { - type G = Chess; - fn apply(&self, b: &mut Board) { - b.history.push(b.board().make_move_new(self.0)); + fn apply(b: &mut Board, m: &ChessMove) -> Option { + Some(b.make_move_new(*m)) } - fn undo(&self, b: &mut Board) { - b.history.pop(); + fn zobrist_hash(b: &Board) -> u64 { + b.get_hash() } - fn notation(&self, _b: &Board) -> Option { - Some(format!("{}", self.0)) + fn notation(_b: &Board, m: &ChessMove) -> Option { + Some(format!("{}", m)) } } @@ -67,8 +42,7 @@ struct Evaluator; impl minimax::Evaluator for Evaluator { type G = Chess; - fn evaluate(&self, b: &Board) -> minimax::Evaluation { - let board = b.board(); + fn evaluate(&self, board: &Board) -> minimax::Evaluation { let mut score = 0; for sq in 0..64 { let sq = unsafe { chess::Square::new(sq) }; @@ -93,16 +67,16 @@ impl minimax::Evaluator for Evaluator { } fn main() { - let mut b = Board::new(); + let mut b = Board::default(); let opts = minimax::IterativeOptions::new().verbose(); let mut strategy = minimax::IterativeSearch::new(Evaluator::default(), opts); strategy.set_timeout(std::time::Duration::from_secs(1)); while Chess::get_winner(&b).is_none() { - println!("{}", b.board()); + println!("{}", b); match strategy.choose_move(&b) { - Some(m) => m.apply(&mut b), + Some(m) => b = Chess::apply(&mut b, &m).unwrap(), None => break, } } - println!("Checkmate {:?}", b.board().side_to_move()); + println!("Checkmate {:?}", b.side_to_move()); } diff --git a/examples/connect4.rs b/examples/connect4.rs index 803f1f4..ca43849 100644 --- a/examples/connect4.rs +++ b/examples/connect4.rs @@ -52,12 +52,6 @@ impl Default for Board { } } -impl minimax::Zobrist for Board { - fn zobrist_hash(&self) -> u64 { - self.hash - } -} - impl Display for Board { fn fmt(&self, f: &mut Formatter) -> Result { let red_pieces = @@ -95,29 +89,6 @@ impl Place { } } -impl minimax::Move for Place { - type G = Game; - fn apply(&self, b: &mut Board) { - let col = (b.all_pieces >> self.col_shift()) & COL_MASK; - let new_piece = (col + 1) << self.col_shift(); - // Swap colors - b.pieces_to_move ^= b.all_pieces; - b.all_pieces |= new_piece; - b.num_moves += 1; - b.update_hash(new_piece); - } - - fn undo(&self, b: &mut Board) { - let col = (b.all_pieces >> self.col_shift()) & COL_MASK; - let prev_piece = (col ^ (col >> 1)) << self.col_shift(); - b.all_pieces &= !prev_piece; - // Swap colors - b.pieces_to_move ^= b.all_pieces; - b.update_hash(prev_piece); - b.num_moves -= 1; - } -} - pub struct Game; impl minimax::Game for Game { @@ -156,6 +127,22 @@ impl minimax::Game for Game { None } } + + fn apply(b: &mut Board, place: &Place) -> Option { + let mut b = b.clone(); + let col = (b.all_pieces >> place.col_shift()) & COL_MASK; + let new_piece = (col + 1) << place.col_shift(); + // Swap colors + b.pieces_to_move ^= b.all_pieces; + b.all_pieces |= new_piece; + b.num_moves += 1; + b.update_hash(new_piece); + Some(b) + } + + fn zobrist_hash(b: &Board) -> u64 { + b.hash + } } pub struct DumbEvaluator; @@ -291,7 +278,7 @@ fn main() { Some(m) => { let color = if b.reds_move() { "Red" } else { "Yellow" }; println!("{} piece in column {}", color, m.col + 1); - m.apply(&mut b) + b = self::Game::apply(&mut b, &m).unwrap(); } None => break, } diff --git a/examples/ttt.rs b/examples/ttt.rs index e282e27..6b7cc67 100644 --- a/examples/ttt.rs +++ b/examples/ttt.rs @@ -151,6 +151,16 @@ impl minimax::Game for Game { None } } + + fn apply(b: &mut Board, m: &Place) -> Option { + b.squares[m.i as usize] = b.to_move; + b.to_move = b.to_move.invert(); + None + } + fn undo(b: &mut Board, m: &Place) { + b.squares[m.i as usize] = Square::Empty; + b.to_move = b.to_move.invert(); + } } #[derive(Copy, Clone, PartialEq, Eq)] @@ -164,18 +174,6 @@ impl Display for Place { } } -impl minimax::Move for Place { - type G = Game; - fn apply(&self, b: &mut Board) { - b.squares[self.i as usize] = b.to_move; - b.to_move = b.to_move.invert(); - } - fn undo(&self, b: &mut Board) { - b.squares[self.i as usize] = Square::Empty; - b.to_move = b.to_move.invert(); - } -} - pub struct Evaluator; impl Default for Evaluator { @@ -239,7 +237,7 @@ impl minimax::Evaluator for Evaluator { fn main() { use minimax::strategies::negamax::Negamax; - use minimax::{Game, Move, Strategy}; + use minimax::{Game, Strategy}; let mut b = Board::default(); let mut strategies = @@ -249,9 +247,9 @@ fn main() { println!("{}", b); let ref mut strategy = strategies[s]; match strategy.choose_move(&mut b) { - Some(m) => m.apply(&mut b), + Some(m) => self::Game::apply(&mut b, &m), None => break, - } + }; s = 1 - s; } println!("{}", b); diff --git a/src/interface.rs b/src/interface.rs index 374826e..fd65548 100644 --- a/src/interface.rs +++ b/src/interface.rs @@ -6,7 +6,7 @@ pub type Evaluation = i16; // These definitions ensure that they negate to each other, but it leaves -// i32::MIN as a valid value less than WORST_EVAL. Don't use this value, and +// i16::MIN as a valid value less than WORST_EVAL. Don't use this value, and // any Strategy will panic when it tries to negate it. /// An absolutely wonderful outcome, e.g. a win. @@ -41,61 +41,7 @@ pub trait Evaluator { // immediately. } - /// After generating moves, reorder them to explore the most promising first. - /// The default implementation evaluates all thes game states and sorts highest Evaluation first. - fn reorder_moves(&self, s: &mut ::S, moves: &mut [::M]) - where - ::M: Copy, - { - let mut evals = Vec::with_capacity(moves.len()); - for &m in moves.iter() { - m.apply(s); - let eval = if let Some(winner) = Self::G::get_winner(s) { - -winner.evaluate() - } else { - -self.evaluate(s) - }; - evals.push((eval, m)); - m.undo(s); - } - evals.sort_by_key(|eval| eval.0); - for (m, eval) in moves.iter_mut().zip(evals) { - *m = eval.1; - } - } -} - -pub trait UniEval { - /// The type of game that can be evaluated. - type G: UniGame; - /// Evaluate the non-terminal state from the persective of the player to - /// move next. - fn evaluate(&self, s: &::S) -> Evaluation; -} - -/// Defines how a move affects the game state. -/// -/// A move is able to change initial `Game` state, as well as revert the state. -/// This allows the game tree to be searched with a constant amount of space. -pub trait Move { - /// The type of game that the move affects. - type G: Game; - /// Change the state of `S` so that the move is applied. - fn apply(&self, state: &mut ::S); - /// Revert the state of `S` so that the move is undone. - fn undo(&self, state: &mut ::S); - /// Return a human-readable notation for this move in this game state. - fn notation(&self, _state: &::S) -> Option { - None - } - /// Return a small index for this move for position-independent tables. - fn table_index(&self) -> u16 { - 0 - } - /// Maximum index value. - fn max_table_index() -> u16 { - 0 - } + // TODO reorder moves by assigning value to each state and combining with countermoves table etc. } /// The result of playing a game until it finishes. @@ -123,46 +69,15 @@ impl Winner { } } -/// An optional trait for game state types to support hashing. -/// -/// Strategies that cache things by game state require this. -pub trait Zobrist { - /// Hash of the game position. - /// - /// Expected to be pre-calculated and cheaply updated with each apply or - /// undo. - fn zobrist_hash(&self) -> u64; -} - /// Defines the rules for a two-player, perfect-knowledge game. /// /// A game ties together types for the state and moves, generates the possible /// moves from a particular state, and determines whether a state is terminal. +/// +/// This is meant to be defined on an empty newtype so that a game engine can +/// be implemented in a separate crate without having to know about these +/// `minimax` traits. pub trait Game: Sized { - /// The type of the game state. - type S; - /// The type of game moves. - type M: Move; - - /// Generate moves at the given state. - fn generate_moves(state: &Self::S, moves: &mut Vec); - - /// Returns `Some(PlayerJustMoved)` or `Some(PlayerToMove)` if there's a winner, - /// `Some(Draw)` if the state is terminal without a winner, and `None` if - /// the state is non-terminal. - fn get_winner(state: &Self::S) -> Option; - - /// Optional method to return a move that does not change the board state. - /// This does not need to be a legal move from this position, but it is - /// used in some strategies to reject a position early if even passing gives - /// a good position for the opponent. - fn null_move(_state: &Self::S) -> Option { - None - } -} - -/// An alternate system for defining a game. -pub trait UniGame { /// The type of the game state. type S; /// The type of game moves. @@ -172,7 +87,10 @@ pub trait UniGame { fn generate_moves(state: &Self::S, moves: &mut Vec); /// Apply a move to get a new state. - /// This method supports two different implementation strategies: + /// + /// If the method returns a new state, the caller should use that. If the + /// method returns None, the caller should use the original. + /// This enables two different implementation strategies: /// /// 1) Games with large state that want to update in place. /// ``` @@ -196,6 +114,8 @@ pub trait UniGame { /// } /// ``` fn apply(state: &mut Self::S, m: &Self::M) -> Option; + + /// Undo mutation done in apply, if any. fn undo(_state: &mut Self::S, _m: &Self::M) {} /// Returns `Some(PlayerJustMoved)` or `Some(PlayerToMove)` if there's a winner, @@ -231,10 +151,6 @@ pub trait UniGame { } } -pub trait UniStrat { - fn choose_move(&mut self, state: &G::S) -> Option; -} - /// Defines a method of choosing a move for the current player. pub trait Strategy { fn choose_move(&mut self, state: &G::S) -> Option; diff --git a/src/lib.rs b/src/lib.rs index fa912ae..bf9263b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -40,15 +40,9 @@ //! None //! } //! } -//! } //! -//! impl minimax::Move for Tug { -//! type G = TugOfWar; -//! fn apply(&self, state: &mut War) { -//! state.0 += self.0 -//! } -//! fn undo(&self, state: &mut War) { -//! state.0 -= self.0 +//! fn apply(state: &mut War, tug: &Tug) -> Option { +//! Some(War(state.0 + tug.0)) //! } //! } //! diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index 6df4585..5bacf75 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -296,7 +296,7 @@ pub(super) struct Negamaxer { #[cfg(target_arch = "wasm32")] timeout_counter: u32, pub(super) table: T, - pub(super) countermoves: CounterMoves<::M>, + pub(super) countermoves: CounterMoves, move_pool: MovePool<::M>, eval: E, @@ -306,7 +306,6 @@ pub(super) struct Negamaxer { impl::M>> Negamaxer where - ::S: Zobrist, ::M: Copy + Eq, { pub(super) fn new(table: T, eval: E, opts: IterativeOptions) -> Self { @@ -376,9 +375,9 @@ where self.eval.evaluate(s) >= beta { // If we just pass and let the opponent play this position (at reduced depth), - null_move.apply(s); - let value = -self.negamax(s, None, depth - depth_reduction, -beta, -beta + 1)?; - null_move.undo(s); + let mut nulled = AppliedMove::::new(s, null_move); + let value = + -self.negamax(&mut nulled, None, depth - depth_reduction, -beta, -beta + 1)?; // is the result still so good that we shouldn't bother with a full search? if value >= beta { return Some(value); @@ -412,9 +411,8 @@ where let mut best = WORST_EVAL; for m in moves.iter() { - m.apply(s); - let value = -self.noisy_negamax(s, depth - 1, -beta, -alpha)?; - m.undo(s); + let mut new = AppliedMove::::new(s, *m); + let value = -self.noisy_negamax(&mut new, depth - 1, -beta, -alpha)?; best = max(best, value); alpha = max(alpha, value); if alpha >= beta { @@ -446,7 +444,7 @@ where } let alpha_orig = alpha; - let hash = s.zobrist_hash(); + let hash = E::G::zobrist_hash(s); let mut good_move = None; if let Some(value) = self.table.check(hash, depth, &mut good_move, &mut alpha, &mut beta) { return Some(value); @@ -471,7 +469,7 @@ where // Reorder moves. if depth >= self.opts.min_reorder_moves_depth { - self.eval.reorder_moves(s, &mut moves); + // TODO reorder moves } self.countermoves.reorder(prev_move, &mut moves); if let Some(good) = good_move { @@ -482,19 +480,18 @@ where let mut best_move = moves[0]; let mut null_window = false; for &m in moves.iter() { - m.apply(s); + let mut new = AppliedMove::::new(s, m); let value = if null_window { - let probe = -self.negamax(s, Some(m), depth - 1, -alpha - 1, -alpha)?; + let probe = -self.negamax(&mut new, Some(m), depth - 1, -alpha - 1, -alpha)?; if probe > alpha && probe < beta { // Full search fallback. - -self.negamax(s, Some(m), depth - 1, -beta, -probe)? + -self.negamax(&mut new, Some(m), depth - 1, -beta, -probe)? } else { probe } } else { - -self.negamax(s, Some(m), depth - 1, -beta, -alpha)? + -self.negamax(&mut new, Some(m), depth - 1, -beta, -alpha)? }; - m.undo(s); if value > best { best = value; best_move = m; @@ -537,15 +534,14 @@ where let mut alpha = WORST_EVAL; let beta = BEST_EVAL; for value_move in moves.iter_mut() { - value_move.m.apply(s); - let value = -self.negamax(s, Some(value_move.m), depth - 1, -beta, -alpha)?; - value_move.m.undo(s); + let mut new = AppliedMove::::new(s, value_move.m); + let value = -self.negamax(&mut new, Some(value_move.m), depth - 1, -beta, -alpha)?; alpha = max(alpha, value); value_move.value = value; } moves.sort_by_key(|vm| -vm.value); - self.table.update(s.zobrist_hash(), alpha, beta, depth, moves[0].value, moves[0].m); + self.table.update(E::G::zobrist_hash(s), alpha, beta, depth, moves[0].value, moves[0].m); Some(moves[0].value) } } @@ -570,7 +566,7 @@ pub struct IterativeSearch { impl IterativeSearch where ::M: Copy + Eq, - ::S: Clone + Zobrist, + ::S: Clone, { pub fn new(eval: E, opts: IterativeOptions) -> IterativeSearch { let table = TranspositionTable::new(opts.table_byte_size, opts.strategy); @@ -634,7 +630,7 @@ where impl Strategy for IterativeSearch where - ::S: Clone + Zobrist, + ::S: Clone, ::M: Copy + Eq, { fn choose_move(&mut self, s: &::S) -> Option<::M> { @@ -651,7 +647,7 @@ where // Start timer if configured. self.negamaxer.reset_timeout(self.max_time); - let root_hash = s.zobrist_hash(); + let root_hash = E::G::zobrist_hash(s); let mut s_clone = s.clone(); let mut best_move = None; let mut interval_start; @@ -723,7 +719,7 @@ where self.negamaxer.stats.nodes_explored = 0; self.prev_value = entry.value; depth += self.opts.step_increment; - self.negamaxer.table.populate_pv(&mut self.pv, &mut s_clone); + self.negamaxer.table.populate_pv::(&mut self.pv, &s_clone); if unclamp_value(entry.value).abs() == BEST_EVAL { break; } diff --git a/src/strategies/mcts.rs b/src/strategies/mcts.rs index bbb264f..c00125b 100644 --- a/src/strategies/mcts.rs +++ b/src/strategies/mcts.rs @@ -1,4 +1,5 @@ use super::super::interface::*; +use super::super::util::AppliedMove; use super::sync_util::*; use rand::seq::SliceRandom; @@ -202,7 +203,9 @@ impl MonteCarloTreeSearch { moves.clear(); G::generate_moves(&state, &mut moves); let m = moves.choose(&mut rng).unwrap(); - m.apply(&mut state); + if let Some(new_state) = G::apply(&mut state, m) { + state = new_state; + } sign = -sign; depth -= 1; } @@ -246,9 +249,8 @@ impl MonteCarloTreeSearch { // Recurse. let next = node.best_child(1.).unwrap(); let m = next.m.as_ref().unwrap(); - m.apply(state); - let result = -self.simulate(next, state, force_rollout)?; - m.undo(state); + let mut new = AppliedMove::::new(state, *m); + let result = -self.simulate(next, &mut new.get().clone(), force_rollout)?; // Backpropagate. node.update_stats(result) diff --git a/src/strategies/negamax.rs b/src/strategies/negamax.rs index 0b9bb71..f030122 100644 --- a/src/strategies/negamax.rs +++ b/src/strategies/negamax.rs @@ -49,9 +49,8 @@ impl Negamax { E::G::generate_moves(s, &mut moves); let mut best = WORST_EVAL; for m in moves.iter() { - m.apply(s); - let value = -self.negamax(s, depth - 1, -beta, -alpha); - m.undo(s); + let mut new = AppliedMove::::new(s, *m); + let value = -self.negamax(&mut new, depth - 1, -beta, -alpha); best = max(best, value); alpha = max(alpha, value); if alpha >= beta { @@ -86,9 +85,8 @@ where let mut s_clone = s.clone(); for &m in moves.iter() { // determine value for this move - m.apply(&mut s_clone); - let value = -self.negamax(&mut s_clone, self.max_depth - 1, WORST_EVAL, -best); - m.undo(&mut s_clone); + let mut new = AppliedMove::::new(&mut s_clone, m); + let value = -self.negamax(&mut new, self.max_depth - 1, WORST_EVAL, -best); // Strictly better than any move found so far. if value > best { best = value; diff --git a/src/strategies/table.rs b/src/strategies/table.rs index aff6d96..46cd485 100644 --- a/src/strategies/table.rs +++ b/src/strategies/table.rs @@ -107,14 +107,14 @@ pub(super) trait Table { // After finishing a search, populate the principal variation as deep as // the table remembers it. - fn populate_pv(&self, pv: &mut Vec, s: &mut G::S) + fn populate_pv>(&self, pv: &mut Vec, state: &G::S) where - M: Move, - ::S: Zobrist, + G::S: Clone, { pv.clear(); let mut hash_history = Vec::new(); - let mut hash = s.zobrist_hash(); + let mut state = state.clone(); + let mut hash = G::zobrist_hash(&state); while let Some(entry) = self.lookup(hash) { // The principal variation should only have exact nodes, as other // node types are from cutoffs where the node is proven to be @@ -126,18 +126,16 @@ pub(super) trait Table { // equivalent upper and lower bounds. let m = entry.best_move.unwrap(); pv.push(m); - m.apply(s); - hash = s.zobrist_hash(); + if let Some(new_state) = G::apply(&mut state, &m) { + state = new_state; + } + hash = G::zobrist_hash(&state); // Prevent cyclical PVs from being infinitely long. if hash_history.contains(&hash) { break; } hash_history.push(hash); } - // Restore state. - for m in pv.iter().rev() { - m.undo(s); - } } } @@ -381,16 +379,19 @@ impl ConcurrentTable for LockfreeTable { } // A single-threaded utility to find moves that have done well in other branches. -pub(super) struct CounterMoves { +pub(super) struct CounterMoves { countermove_enabled: bool, history_enabled: bool, // For a given move index, which followup most recently led to a beta cutoff? - countermove_table: Vec, + countermove_table: Vec, // For each move index, how many beta cutoffs has it produced? history_table: Vec, } -impl CounterMoves { +impl CounterMoves +where + G::M: Eq + Copy, +{ pub(super) fn new(countermove_enabled: bool, history_enabled: bool) -> Self { Self { countermove_enabled, @@ -400,38 +401,38 @@ impl CounterMoves { } } - pub(super) fn reorder(&self, prev: Option, moves: &mut [M]) { + pub(super) fn reorder(&self, prev: Option, moves: &mut [G::M]) { if !self.history_table.is_empty() { // Stable sort to preserve previous orderings. - moves.sort_by_key(|m| !self.history_table[m.table_index() as usize]); + moves.sort_by_key(|m| !self.history_table[G::table_index(m) as usize]); } if let Some(prev) = prev { - if let Some(response) = self.countermove_table.get(prev.table_index() as usize) { + if let Some(response) = self.countermove_table.get(G::table_index(&prev) as usize) { move_to_front(*response, moves); } } } - pub(super) fn update(&mut self, prev: Option, m: M) { + pub(super) fn update(&mut self, prev: Option, m: G::M) { if let Some(prev) = prev { - if let Some(entry) = self.countermove_table.get_mut(prev.table_index() as usize) { + if let Some(entry) = self.countermove_table.get_mut(G::table_index(&prev) as usize) { *entry = m; } } - if let Some(entry) = self.history_table.get_mut(m.table_index() as usize) { + if let Some(entry) = self.history_table.get_mut(G::table_index(&m) as usize) { *entry = 1u32.saturating_add(*entry); } } - pub(super) fn advance_generation(&mut self, null_move: Option) { + pub(super) fn advance_generation(&mut self, null_move: Option) { // Lazily allocate tables if self.countermove_enabled && self.countermove_table.is_empty() { if let Some(m) = null_move { - self.countermove_table = vec![m; M::max_table_index() as usize + 1]; + self.countermove_table = vec![m; G::max_table_index() as usize + 1]; } } if self.history_enabled && self.history_table.is_empty() { - self.history_table = vec![0; M::max_table_index() as usize + 1]; + self.history_table = vec![0; G::max_table_index() as usize + 1]; } // Partially degrade old values, to bias towards new data. diff --git a/src/strategies/util.rs b/src/strategies/util.rs index f9df67a..a425c54 100644 --- a/src/strategies/util.rs +++ b/src/strategies/util.rs @@ -1,4 +1,5 @@ use super::super::interface::*; +use super::super::util::AppliedMove; // For values near winning and losing values, push them slightly closer to zero. // A win in 3 moves (BEST-3) will be chosen over a win in 5 moves (BEST-5). @@ -25,39 +26,32 @@ pub(super) fn unclamp_value(value: Evaluation) -> Evaluation { } // Return a unique id for humans for this move. -pub(super) fn move_id(s: &mut ::S, m: Option<::M>) -> String -where - ::S: Zobrist, -{ +pub(super) fn move_id(s: &mut ::S, m: Option<::M>) -> String { if let Some(mov) = m { - if let Some(notation) = mov.notation(s) { - notation - } else { - mov.apply(s); - let id = format!("{:06x}", s.zobrist_hash() & 0xffffff); - mov.undo(s); - id - } + G::notation(s, &mov).unwrap_or_else(|| { + let new = AppliedMove::::new(s, mov); + format!("{:06x}", G::zobrist_hash(&new) & 0xffffff) + }) } else { "none".to_string() } } -pub(super) fn pv_string(path: &[::M], s: &mut ::S) -> String +pub(super) fn pv_string(path: &[::M], state: &::S) -> String where - ::S: Zobrist, ::M: Copy, + ::S: Clone, { + let mut state = state.clone(); let mut out = String::new(); for (i, m) in (0..).zip(path.iter()) { if i > 0 { out.push_str("; "); } - out.push_str(move_id::(s, Some(*m)).as_str()); - m.apply(s); - } - for m in path.iter().rev() { - m.undo(s); + out.push_str(move_id::(&mut state, Some(*m)).as_str()); + if let Some(new_state) = G::apply(&mut state, m) { + state = new_state; + } } out } diff --git a/src/strategies/ybw.rs b/src/strategies/ybw.rs index 6613d2d..7301725 100644 --- a/src/strategies/ybw.rs +++ b/src/strategies/ybw.rs @@ -72,13 +72,13 @@ struct ParallelNegamaxer { timeout: Arc, stats: ThreadLocal>, move_pool: ThreadLocal::M>>, - countermoves: ThreadLocal::M>>, + countermoves: ThreadLocal>, pv: Mutex::M>>, } impl ParallelNegamaxer where - ::S: Clone + Zobrist + Send + Sync, + ::S: Clone + Send + Sync, ::M: Copy + Eq + Send + Sync, E: Clone + Sync + Send + 'static, { @@ -119,9 +119,9 @@ where self.eval.evaluate(s) >= beta { // If we just pass and let the opponent play this position (at reduced depth), - null_move.apply(s); - let value = -self.negamax(s, None, depth - depth_reduction, -beta, -beta + 1)?; - null_move.undo(s); + let mut nulled = AppliedMove::::new(s, null_move); + let value = + -self.negamax(&mut nulled, None, depth - depth_reduction, -beta, -beta + 1)?; // is the result still so good that we shouldn't bother with a full search? if value >= beta { return Some(value); @@ -155,10 +155,9 @@ where } let mut best = WORST_EVAL; - for m in moves.iter() { - m.apply(s); - let value = -self.noisy_negamax(s, depth - 1, -beta, -alpha)?; - m.undo(s); + for &m in moves.iter() { + let mut new = AppliedMove::::new(s, m); + let value = -self.noisy_negamax(&mut new, depth - 1, -beta, -alpha)?; best = max(best, value); alpha = max(alpha, value); if alpha >= beta { @@ -175,7 +174,7 @@ where mut alpha: Evaluation, mut beta: Evaluation, ) -> Option where - ::S: Clone + Zobrist + Send + Sync, + ::S: Clone + Send + Sync, ::M: Copy + Eq + Send + Sync, E: Sync, { @@ -195,7 +194,7 @@ where } let alpha_orig = alpha; - let hash = s.zobrist_hash(); + let hash = E::G::zobrist_hash(s); let mut good_move = None; if let Some(value) = self.table.check(hash, depth, &mut good_move, &mut alpha, &mut beta) { return Some(value); @@ -216,7 +215,7 @@ where // Reorder moves. if depth >= self.opts.min_reorder_moves_depth { - self.eval.reorder_moves(s, &mut moves); + // TODO: reorder moves } self.countermoves.local_do(|cm| cm.reorder(prev_move, &mut moves)); if let Some(good) = good_move { @@ -226,9 +225,10 @@ where let first_move = moves[0]; // Evaluate first move serially. - first_move.apply(s); - let initial_value = -self.negamax(s, Some(first_move), depth - 1, -beta, -alpha)?; - first_move.undo(s); + let initial_value = { + let mut new = AppliedMove::::new(s, first_move); + -self.negamax(&mut new, Some(first_move), depth - 1, -beta, -alpha)? + }; alpha = max(alpha, initial_value); let (best, best_move) = if alpha >= beta { // Skip search @@ -239,19 +239,18 @@ where let mut best_move = first_move; let mut null_window = false; for &m in moves[1..].iter() { - m.apply(s); + let mut new = AppliedMove::::new(s, m); let value = if null_window { - let probe = -self.negamax(s, Some(m), depth - 1, -alpha - 1, -alpha)?; + let probe = -self.negamax(&mut new, Some(m), depth - 1, -alpha - 1, -alpha)?; if probe > alpha && probe < beta { // Full search fallback. - -self.negamax(s, Some(m), depth - 1, -beta, -probe)? + -self.negamax(&mut new, Some(m), depth - 1, -beta, -probe)? } else { probe } } else { - -self.negamax(s, Some(m), depth - 1, -beta, -alpha)? + -self.negamax(&mut new, Some(m), depth - 1, -beta, -alpha)? }; - m.undo(s); if value > best { best = value; best_move = m; @@ -280,11 +279,11 @@ where } let mut state = s.clone(); - m.apply(&mut state); + let mut new = AppliedMove::::new(&mut state, m); let value = if self.opts.null_window_search && initial_alpha > alpha_orig { // TODO: send reference to alpha as neg_beta to children. let probe = -self.negamax( - &mut state, + &mut new, Some(m), depth - 1, -initial_alpha - 1, @@ -296,12 +295,12 @@ where return None; } // Full search fallback. - -self.negamax(&mut state, Some(m), depth - 1, -beta, -probe)? + -self.negamax(&mut new, Some(m), depth - 1, -beta, -probe)? } else { probe } } else { - -self.negamax(&mut state, Some(m), depth - 1, -beta, -initial_alpha)? + -self.negamax(&mut new, Some(m), depth - 1, -beta, -initial_alpha)? }; alpha.fetch_max(value, Ordering::SeqCst); @@ -327,7 +326,7 @@ where &self, mut state: ::S, max_depth: u8, background: bool, ) -> Option<(::M, Evaluation)> { self.table.concurrent_advance_generation(); - let root_hash = state.zobrist_hash(); + let root_hash = E::G::zobrist_hash(&state); let mut best_move = None; let mut best_value = 0; let mut interval_start; @@ -372,9 +371,9 @@ where depth += self.opts.step_increment; let mut pv_moves = Vec::new(); - self.table.populate_pv(&mut pv_moves, &mut state); + self.table.populate_pv::(&mut pv_moves, &state); self.pv.lock().unwrap().clone_from(&pv_moves); - pv = pv_string::(&pv_moves[..], &mut state); + pv = pv_string::(&pv_moves[..], &state); if unclamp_value(entry.value).abs() == BEST_EVAL { break; } @@ -439,7 +438,7 @@ impl ParallelSearch { impl Strategy for ParallelSearch where - ::S: Clone + Zobrist + Send + Sync, + ::S: Clone + Send + Sync, ::M: Copy + Eq + Send + Sync, E: Clone + Sync + Send + 'static, { @@ -493,7 +492,9 @@ where &self.thread_pool, ); let mut state = s.clone(); - best_move.apply(&mut state); + if let Some(new_state) = E::G::apply(&mut state, &best_move) { + state = new_state; + } // Launch in threadpool asynchronously. self.thread_pool.spawn(move || { negamaxer.iterative_search(state, 99, true); diff --git a/src/util.rs b/src/util.rs index c4d5b0d..24c76e6 100644 --- a/src/util.rs +++ b/src/util.rs @@ -4,13 +4,49 @@ extern crate rayon; use super::interface; -use super::interface::{Game, Move}; +use super::interface::Game; #[cfg(not(target_arch = "wasm32"))] use rayon::prelude::*; use std::default::Default; use std::time::Instant; +pub(crate) struct AppliedMove<'a, G: Game> { + old: &'a mut ::S, + new: Option<::S>, + m: ::M, +} + +impl<'a, G: Game> std::ops::Deref for AppliedMove<'a, G> { + type Target = ::S; + fn deref(&self) -> &::S { + self.new.as_ref().unwrap_or(self.old) + } +} + +impl<'a, G: Game> std::ops::DerefMut for AppliedMove<'a, G> { + fn deref_mut(&mut self) -> &mut ::S { + self.new.as_mut().unwrap_or(self.old) + } +} + +impl<'a, G: Game> Drop for AppliedMove<'a, G> { + fn drop(&mut self) { + ::undo(self.old, &self.m) + } +} + +impl<'a, G: Game> AppliedMove<'a, G> { + pub(crate) fn new(old: &'a mut ::S, m: ::M) -> Self { + let new = G::apply(old, &m); + AppliedMove { old, new, m } + } + + pub(crate) fn get(&mut self) -> &mut ::S { + self.new.as_mut().unwrap_or(self.old) + } +} + /// Play a complete, new game with players using the two provided strategies. /// /// Returns `None` if the game ends in a draw, or `Some(0)`, `Some(1)` if the @@ -28,7 +64,11 @@ where while G::get_winner(&state).is_none() { let strategy = &mut strategies[s]; match strategy.choose_move(&state) { - Some(m) => m.apply(&mut state), + Some(m) => { + if let Some(new_state) = G::apply(&mut state, &m) { + state = new_state; + } + } None => break, } s = 1 - s; @@ -83,10 +123,9 @@ where } else if depth <= single_thread_cutoff { // Single-thread recurse. let mut count = 0; - for m in moves.iter() { - m.apply(state); - count += perft_recurse::(pool, state, depth - 1, single_thread_cutoff); - m.undo(state); + for &m in moves.iter() { + let mut new = AppliedMove::::new(state, m); + count += perft_recurse::(pool, &mut new, depth - 1, single_thread_cutoff); } count } else { @@ -94,11 +133,13 @@ where moves .par_iter() .with_max_len(1) - .map(|&m| { - let mut state2 = state.clone(); + .map(|m| { + let mut state = state.clone(); let mut pool2 = MovePool::::default(); - m.apply(&mut state2); - perft_recurse::(&mut pool2, &mut state2, depth - 1, single_thread_cutoff) + if let Some(new_state) = G::apply(&mut state, m) { + state = new_state; + } + perft_recurse::(&mut pool2, &mut state, depth - 1, single_thread_cutoff) }) .sum() }; @@ -129,9 +170,8 @@ where // Single-thread recurse. let mut count = 0; for m in moves.iter() { - m.apply(state); + let mut new = AppliedMove::::new(&mut state, m); count += perft_recurse::(pool, state, depth - 1, single_thread_cutoff); - m.undo(state); } count }; diff --git a/tests/strategies.rs b/tests/strategies.rs index a3c8011..b52e407 100644 --- a/tests/strategies.rs +++ b/tests/strategies.rs @@ -42,9 +42,8 @@ impl PlainNegamax { E::G::generate_moves(s, &mut moves); let mut best = WORST_EVAL; for m in moves.iter() { - m.apply(s); - let value = -self.negamax(s, depth - 1); - m.undo(s); + let mut new = E::G::apply(s, m).unwrap(); + let value = -self.negamax(&mut new, depth - 1); best = max(best, value); } best @@ -62,11 +61,10 @@ where self.best_moves.clear(); let mut best_value = WORST_EVAL; - let mut s_clone = s.clone(); + let mut s = s.clone(); for &m in moves.iter() { - m.apply(&mut s_clone); - let value = -self.negamax(&mut s_clone, self.depth - 1); - m.undo(&mut s_clone); + let mut new = E::G::apply(&mut s, &m).unwrap(); + let value = -self.negamax(&mut new, self.depth - 1); if value == best_value { self.best_moves.push(m); } else if value > best_value { @@ -80,149 +78,6 @@ where } } -struct AppliedMove<'a, G: UniGame> { - old: &'a mut ::S, - new: Option<::S>, - m: ::M, -} - -impl<'a, G: UniGame> std::ops::Deref for AppliedMove<'a, G> { - type Target = ::S; - fn deref(&self) -> &::S { - self.new.as_ref().unwrap_or(self.old) - } -} - -impl<'a, G: UniGame> std::ops::DerefMut for AppliedMove<'a, G> { - fn deref_mut(&mut self) -> &mut ::S { - self.new.as_mut().unwrap_or(self.old) - } -} - -impl<'a, G: UniGame> Drop for AppliedMove<'a, G> { - fn drop(&mut self) { - ::undo(self.old, &self.m) - } -} - -impl<'a, G: UniGame> AppliedMove<'a, G> { - fn new(old: &'a mut ::S, m: ::M) -> Self { - let new = ::apply(old, &m); - AppliedMove { old, new, m } - } - - fn get(&mut self) -> &mut ::S { - self.new.as_mut().unwrap_or(self.old) - } -} - -pub struct UniNegamax { - depth: u8, - root_value: Evaluation, - // All moves tied with the best valuation. - best_moves: Vec<::M>, - eval: E, -} - -impl UniNegamax { - pub fn new(eval: E, depth: u8) -> UniNegamax { - UniNegamax { depth: depth, root_value: 0, best_moves: Vec::new(), eval } - } - - fn negamax(&self, s: &mut ::S, depth: u8) -> Evaluation - where - <::G as UniGame>::M: Copy, - { - if let Some(winner) = E::G::get_winner(s) { - return winner.evaluate(); - } - if depth == 0 { - return self.eval.evaluate(s); - } - let mut moves = Vec::new(); - ::G::generate_moves(s, &mut moves); - let mut best = WORST_EVAL; - for m in moves.iter() { - { - let mut new = AppliedMove::::new(s, *m); - let value = -self.negamax(new.get(), depth - 1); - best = max(best, value); - } - } - best - } -} - -impl UniStrat for UniNegamax -where - ::S: Clone, - ::M: Copy, -{ - fn choose_move(&mut self, s: &::S) -> Option<::M> { - let mut moves = Vec::new(); - E::G::generate_moves(s, &mut moves); - - self.best_moves.clear(); - let mut best_value = WORST_EVAL; - let mut s_clone = s.clone(); - for &m in moves.iter() { - let value = { - let mut new = AppliedMove::::new(&mut s_clone, m); - -self.negamax(&mut new, self.depth - 1) - }; - if value == best_value { - self.best_moves.push(m); - } else if value > best_value { - best_value = value; - self.best_moves.clear(); - self.best_moves.push(m); - } - } - self.root_value = best_value; - self.best_moves.first().map(|m| *m) - } -} - -#[derive(Copy, Clone)] -struct TugBoard(i8); -#[derive(Copy, Clone)] -struct TugMove(i8); -struct TugGame; - -impl minimax::UniGame for TugGame { - type S = TugBoard; - type M = TugMove; - - fn generate_moves(_b: &TugBoard, moves: &mut Vec) { - moves.push(TugMove(1)); - moves.push(TugMove(-1)); - } - - fn apply(b: &mut TugBoard, m: &TugMove) -> Option { - Some(TugBoard(b.0 + m.0)) - } - - fn get_winner(_b: &TugBoard) -> Option { - None - } -} - -#[derive(Clone)] -struct UniRandom; - -impl Default for UniRandom { - fn default() -> Self { - Self {} - } -} - -impl minimax::UniEval for UniRandom { - type G = TugGame; - fn evaluate(&self, b: &TugBoard) -> minimax::Evaluation { - b.0 as minimax::Evaluation - } -} - #[derive(Clone)] struct RandomEvaluator; @@ -252,10 +107,11 @@ fn generate_random_state(depth: u8) -> connect4::Board { let mut moves = Vec::new(); connect4::Game::generate_moves(&b, &mut moves); let m = moves.choose(&mut rng).unwrap(); - m.apply(&mut b); - if connect4::Game::get_winner(&b).is_some() { + let next = connect4::Game::apply(&mut b, &m).unwrap(); + if connect4::Game::get_winner(&next).is_some() { // Oops, undo and try again on the next iter. - m.undo(&mut b); + } else { + b = next; } } b @@ -264,13 +120,13 @@ fn generate_random_state(depth: u8) -> connect4::Board { #[test] fn test_winning_position() { let mut b = connect4::Board::default(); - connect4::Place { col: 2 }.apply(&mut b); - connect4::Place { col: 3 }.apply(&mut b); - connect4::Place { col: 2 }.apply(&mut b); - connect4::Place { col: 3 }.apply(&mut b); - connect4::Place { col: 2 }.apply(&mut b); - connect4::Place { col: 3 }.apply(&mut b); - connect4::Place { col: 2 }.apply(&mut b); + b = connect4::Game::apply(&mut b, &connect4::Place { col: 2 }).unwrap(); + b = connect4::Game::apply(&mut b, &connect4::Place { col: 3 }).unwrap(); + b = connect4::Game::apply(&mut b, &connect4::Place { col: 2 }).unwrap(); + b = connect4::Game::apply(&mut b, &connect4::Place { col: 3 }).unwrap(); + b = connect4::Game::apply(&mut b, &connect4::Place { col: 2 }).unwrap(); + b = connect4::Game::apply(&mut b, &connect4::Place { col: 3 }).unwrap(); + b = connect4::Game::apply(&mut b, &connect4::Place { col: 2 }).unwrap(); assert_eq!(Some(Winner::PlayerJustMoved), connect4::Game::get_winner(&b)); // Make sure none of the strategies die when given a winning position. From 5b4f859acaea8880ea6489982e523cbc17cfe440 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Tue, 21 Mar 2023 15:52:39 -0700 Subject: [PATCH 133/162] Remove unnecessary move reference in apply() and undo(). --- examples/chess/src/main.rs | 6 +++--- examples/connect4.rs | 4 ++-- examples/ttt.rs | 6 +++--- src/interface.rs | 10 +++++----- src/lib.rs | 2 +- src/strategies/mcts.rs | 2 +- src/strategies/table.rs | 2 +- src/strategies/util.rs | 4 ++-- src/strategies/ybw.rs | 2 +- src/util.rs | 8 ++++---- tests/strategies.rs | 20 ++++++++++---------- 11 files changed, 33 insertions(+), 33 deletions(-) diff --git a/examples/chess/src/main.rs b/examples/chess/src/main.rs index e357a91..0c4a4cd 100644 --- a/examples/chess/src/main.rs +++ b/examples/chess/src/main.rs @@ -24,8 +24,8 @@ impl minimax::Game for Chess { } } - fn apply(b: &mut Board, m: &ChessMove) -> Option { - Some(b.make_move_new(*m)) + fn apply(b: &mut Board, m: ChessMove) -> Option { + Some(b.make_move_new(m)) } fn zobrist_hash(b: &Board) -> u64 { @@ -74,7 +74,7 @@ fn main() { while Chess::get_winner(&b).is_none() { println!("{}", b); match strategy.choose_move(&b) { - Some(m) => b = Chess::apply(&mut b, &m).unwrap(), + Some(m) => b = Chess::apply(&mut b, m).unwrap(), None => break, } } diff --git a/examples/connect4.rs b/examples/connect4.rs index ca43849..2d81670 100644 --- a/examples/connect4.rs +++ b/examples/connect4.rs @@ -128,7 +128,7 @@ impl minimax::Game for Game { } } - fn apply(b: &mut Board, place: &Place) -> Option { + fn apply(b: &mut Board, place: Place) -> Option { let mut b = b.clone(); let col = (b.all_pieces >> place.col_shift()) & COL_MASK; let new_piece = (col + 1) << place.col_shift(); @@ -278,7 +278,7 @@ fn main() { Some(m) => { let color = if b.reds_move() { "Red" } else { "Yellow" }; println!("{} piece in column {}", color, m.col + 1); - b = self::Game::apply(&mut b, &m).unwrap(); + b = self::Game::apply(&mut b, m).unwrap(); } None => break, } diff --git a/examples/ttt.rs b/examples/ttt.rs index 6b7cc67..eaf7e3e 100644 --- a/examples/ttt.rs +++ b/examples/ttt.rs @@ -152,12 +152,12 @@ impl minimax::Game for Game { } } - fn apply(b: &mut Board, m: &Place) -> Option { + fn apply(b: &mut Board, m: Place) -> Option { b.squares[m.i as usize] = b.to_move; b.to_move = b.to_move.invert(); None } - fn undo(b: &mut Board, m: &Place) { + fn undo(b: &mut Board, m: Place) { b.squares[m.i as usize] = Square::Empty; b.to_move = b.to_move.invert(); } @@ -247,7 +247,7 @@ fn main() { println!("{}", b); let ref mut strategy = strategies[s]; match strategy.choose_move(&mut b) { - Some(m) => self::Game::apply(&mut b, &m), + Some(m) => self::Game::apply(&mut b, m), None => break, }; s = 1 - s; diff --git a/src/interface.rs b/src/interface.rs index fd65548..5b3f361 100644 --- a/src/interface.rs +++ b/src/interface.rs @@ -96,11 +96,11 @@ pub trait Game: Sized { /// ``` /// struct BigBoard([u8; 4096]); /// struct BigMove(u16); - /// fn apply(state: &mut BigBoard, m: &BigMove) -> Option { + /// fn apply(state: &mut BigBoard, m: BigMove) -> Option { /// state.0[m.0 as usize] += 1; /// None /// } - /// fn undo(state: &mut BigBoard, m: &BigMove) { + /// fn undo(state: &mut BigBoard, m: BigMove) { /// state.0[m.0 as usize] -= 1; /// } /// ``` @@ -109,14 +109,14 @@ pub trait Game: Sized { /// ``` /// struct SmallBoard(u64); /// struct SmallMove(u8); - /// fn apply(state: &mut SmallBoard, m: &SmallMove) -> Option { + /// fn apply(state: &mut SmallBoard, m: SmallMove) -> Option { /// Some(SmallBoard(state.0 | (1< Option; + fn apply(state: &mut Self::S, m: Self::M) -> Option; /// Undo mutation done in apply, if any. - fn undo(_state: &mut Self::S, _m: &Self::M) {} + fn undo(_state: &mut Self::S, _m: Self::M) {} /// Returns `Some(PlayerJustMoved)` or `Some(PlayerToMove)` if there's a winner, /// `Some(Draw)` if the state is terminal without a winner, and `None` if diff --git a/src/lib.rs b/src/lib.rs index bf9263b..750a2e4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -41,7 +41,7 @@ //! } //! } //! -//! fn apply(state: &mut War, tug: &Tug) -> Option { +//! fn apply(state: &mut War, tug: Tug) -> Option { //! Some(War(state.0 + tug.0)) //! } //! } diff --git a/src/strategies/mcts.rs b/src/strategies/mcts.rs index c00125b..d37d972 100644 --- a/src/strategies/mcts.rs +++ b/src/strategies/mcts.rs @@ -203,7 +203,7 @@ impl MonteCarloTreeSearch { moves.clear(); G::generate_moves(&state, &mut moves); let m = moves.choose(&mut rng).unwrap(); - if let Some(new_state) = G::apply(&mut state, m) { + if let Some(new_state) = G::apply(&mut state, *m) { state = new_state; } sign = -sign; diff --git a/src/strategies/table.rs b/src/strategies/table.rs index 46cd485..6cc0916 100644 --- a/src/strategies/table.rs +++ b/src/strategies/table.rs @@ -126,7 +126,7 @@ pub(super) trait Table { // equivalent upper and lower bounds. let m = entry.best_move.unwrap(); pv.push(m); - if let Some(new_state) = G::apply(&mut state, &m) { + if let Some(new_state) = G::apply(&mut state, m) { state = new_state; } hash = G::zobrist_hash(&state); diff --git a/src/strategies/util.rs b/src/strategies/util.rs index a425c54..444cae9 100644 --- a/src/strategies/util.rs +++ b/src/strategies/util.rs @@ -44,11 +44,11 @@ where { let mut state = state.clone(); let mut out = String::new(); - for (i, m) in (0..).zip(path.iter()) { + for (i, &m) in (0..).zip(path.iter()) { if i > 0 { out.push_str("; "); } - out.push_str(move_id::(&mut state, Some(*m)).as_str()); + out.push_str(move_id::(&mut state, Some(m)).as_str()); if let Some(new_state) = G::apply(&mut state, m) { state = new_state; } diff --git a/src/strategies/ybw.rs b/src/strategies/ybw.rs index 7301725..0f91bd7 100644 --- a/src/strategies/ybw.rs +++ b/src/strategies/ybw.rs @@ -492,7 +492,7 @@ where &self.thread_pool, ); let mut state = s.clone(); - if let Some(new_state) = E::G::apply(&mut state, &best_move) { + if let Some(new_state) = E::G::apply(&mut state, best_move) { state = new_state; } // Launch in threadpool asynchronously. diff --git a/src/util.rs b/src/util.rs index 24c76e6..761e9e6 100644 --- a/src/util.rs +++ b/src/util.rs @@ -32,13 +32,13 @@ impl<'a, G: Game> std::ops::DerefMut for AppliedMove<'a, G> { impl<'a, G: Game> Drop for AppliedMove<'a, G> { fn drop(&mut self) { - ::undo(self.old, &self.m) + ::undo(self.old, self.m) } } impl<'a, G: Game> AppliedMove<'a, G> { pub(crate) fn new(old: &'a mut ::S, m: ::M) -> Self { - let new = G::apply(old, &m); + let new = G::apply(old, m); AppliedMove { old, new, m } } @@ -65,7 +65,7 @@ where let strategy = &mut strategies[s]; match strategy.choose_move(&state) { Some(m) => { - if let Some(new_state) = G::apply(&mut state, &m) { + if let Some(new_state) = G::apply(&mut state, m) { state = new_state; } } @@ -136,7 +136,7 @@ where .map(|m| { let mut state = state.clone(); let mut pool2 = MovePool::::default(); - if let Some(new_state) = G::apply(&mut state, m) { + if let Some(new_state) = G::apply(&mut state, *m) { state = new_state; } perft_recurse::(&mut pool2, &mut state, depth - 1, single_thread_cutoff) diff --git a/tests/strategies.rs b/tests/strategies.rs index b52e407..bc290df 100644 --- a/tests/strategies.rs +++ b/tests/strategies.rs @@ -41,7 +41,7 @@ impl PlainNegamax { let mut moves = Vec::new(); E::G::generate_moves(s, &mut moves); let mut best = WORST_EVAL; - for m in moves.iter() { + for &m in moves.iter() { let mut new = E::G::apply(s, m).unwrap(); let value = -self.negamax(&mut new, depth - 1); best = max(best, value); @@ -63,7 +63,7 @@ where let mut best_value = WORST_EVAL; let mut s = s.clone(); for &m in moves.iter() { - let mut new = E::G::apply(&mut s, &m).unwrap(); + let mut new = E::G::apply(&mut s, m).unwrap(); let value = -self.negamax(&mut new, self.depth - 1); if value == best_value { self.best_moves.push(m); @@ -107,7 +107,7 @@ fn generate_random_state(depth: u8) -> connect4::Board { let mut moves = Vec::new(); connect4::Game::generate_moves(&b, &mut moves); let m = moves.choose(&mut rng).unwrap(); - let next = connect4::Game::apply(&mut b, &m).unwrap(); + let next = connect4::Game::apply(&mut b, *m).unwrap(); if connect4::Game::get_winner(&next).is_some() { // Oops, undo and try again on the next iter. } else { @@ -120,13 +120,13 @@ fn generate_random_state(depth: u8) -> connect4::Board { #[test] fn test_winning_position() { let mut b = connect4::Board::default(); - b = connect4::Game::apply(&mut b, &connect4::Place { col: 2 }).unwrap(); - b = connect4::Game::apply(&mut b, &connect4::Place { col: 3 }).unwrap(); - b = connect4::Game::apply(&mut b, &connect4::Place { col: 2 }).unwrap(); - b = connect4::Game::apply(&mut b, &connect4::Place { col: 3 }).unwrap(); - b = connect4::Game::apply(&mut b, &connect4::Place { col: 2 }).unwrap(); - b = connect4::Game::apply(&mut b, &connect4::Place { col: 3 }).unwrap(); - b = connect4::Game::apply(&mut b, &connect4::Place { col: 2 }).unwrap(); + b = connect4::Game::apply(&mut b, connect4::Place { col: 2 }).unwrap(); + b = connect4::Game::apply(&mut b, connect4::Place { col: 3 }).unwrap(); + b = connect4::Game::apply(&mut b, connect4::Place { col: 2 }).unwrap(); + b = connect4::Game::apply(&mut b, connect4::Place { col: 3 }).unwrap(); + b = connect4::Game::apply(&mut b, connect4::Place { col: 2 }).unwrap(); + b = connect4::Game::apply(&mut b, connect4::Place { col: 3 }).unwrap(); + b = connect4::Game::apply(&mut b, connect4::Place { col: 2 }).unwrap(); assert_eq!(Some(Winner::PlayerJustMoved), connect4::Game::get_winner(&b)); // Make sure none of the strategies die when given a winning position. From e436e1d42053ef7ccc8a1a14bba28a9175367051 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Tue, 21 Mar 2023 20:39:12 -0700 Subject: [PATCH 134/162] Remove move referencing in other Game trait APIs. --- examples/chess/src/main.rs | 2 +- src/interface.rs | 4 ++-- src/strategies/table.rs | 8 ++++---- src/strategies/util.rs | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/examples/chess/src/main.rs b/examples/chess/src/main.rs index 0c4a4cd..09c3018 100644 --- a/examples/chess/src/main.rs +++ b/examples/chess/src/main.rs @@ -32,7 +32,7 @@ impl minimax::Game for Chess { b.get_hash() } - fn notation(_b: &Board, m: &ChessMove) -> Option { + fn notation(_b: &Board, m: ChessMove) -> Option { Some(format!("{}", m)) } } diff --git a/src/interface.rs b/src/interface.rs index 5b3f361..73b9894 100644 --- a/src/interface.rs +++ b/src/interface.rs @@ -138,11 +138,11 @@ pub trait Game: Sized { } /// Return a human-readable notation for this move in this game state. - fn notation(_state: &Self::S, _move: &Self::M) -> Option { + fn notation(_state: &Self::S, _move: Self::M) -> Option { None } /// Return a small index for this move for position-independent tables. - fn table_index(_: &Self::M) -> u16 { + fn table_index(_: Self::M) -> u16 { 0 } /// Maximum index value. diff --git a/src/strategies/table.rs b/src/strategies/table.rs index 6cc0916..abc6765 100644 --- a/src/strategies/table.rs +++ b/src/strategies/table.rs @@ -404,10 +404,10 @@ where pub(super) fn reorder(&self, prev: Option, moves: &mut [G::M]) { if !self.history_table.is_empty() { // Stable sort to preserve previous orderings. - moves.sort_by_key(|m| !self.history_table[G::table_index(m) as usize]); + moves.sort_by_key(|&m| !self.history_table[G::table_index(m) as usize]); } if let Some(prev) = prev { - if let Some(response) = self.countermove_table.get(G::table_index(&prev) as usize) { + if let Some(response) = self.countermove_table.get(G::table_index(prev) as usize) { move_to_front(*response, moves); } } @@ -415,11 +415,11 @@ where pub(super) fn update(&mut self, prev: Option, m: G::M) { if let Some(prev) = prev { - if let Some(entry) = self.countermove_table.get_mut(G::table_index(&prev) as usize) { + if let Some(entry) = self.countermove_table.get_mut(G::table_index(prev) as usize) { *entry = m; } } - if let Some(entry) = self.history_table.get_mut(G::table_index(&m) as usize) { + if let Some(entry) = self.history_table.get_mut(G::table_index(m) as usize) { *entry = 1u32.saturating_add(*entry); } } diff --git a/src/strategies/util.rs b/src/strategies/util.rs index 444cae9..0f659a0 100644 --- a/src/strategies/util.rs +++ b/src/strategies/util.rs @@ -28,7 +28,7 @@ pub(super) fn unclamp_value(value: Evaluation) -> Evaluation { // Return a unique id for humans for this move. pub(super) fn move_id(s: &mut ::S, m: Option<::M>) -> String { if let Some(mov) = m { - G::notation(s, &mov).unwrap_or_else(|| { + G::notation(s, mov).unwrap_or_else(|| { let new = AppliedMove::::new(s, mov); format!("{:06x}", G::zobrist_hash(&new) & 0xffffff) }) From 7510412ac9676ad13140b9328b6639da64bcc88a Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Tue, 21 Mar 2023 21:04:49 -0700 Subject: [PATCH 135/162] Release 0.5.0 New unified Game trait * Unifies functions from Game, Move, Zobrist traits. * Unifies game state semantics for mutate-in-place and copy-on-play games. * No useful types need to implement minimax traits, so can you can easily wrap games from other crates. --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 2b3e03f..019c855 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "minimax" -version = "0.4.0" +version = "0.5.0" authors = [ "Samuel Fredrickson ", "Eric Roshan-Eisner " ] description = "Generic implementations of Minimax." documentation = "https://docs.rs/minimax" From a1acf1265b7c3d63be8df6a1df1ec0535f179792 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Tue, 21 Mar 2023 21:34:49 -0700 Subject: [PATCH 136/162] Fix wasm build. --- src/strategies/mcts.rs | 2 +- src/util.rs | 10 +++------- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/src/strategies/mcts.rs b/src/strategies/mcts.rs index d37d972..895d47b 100644 --- a/src/strategies/mcts.rs +++ b/src/strategies/mcts.rs @@ -250,7 +250,7 @@ impl MonteCarloTreeSearch { let next = node.best_child(1.).unwrap(); let m = next.m.as_ref().unwrap(); let mut new = AppliedMove::::new(state, *m); - let result = -self.simulate(next, &mut new.get().clone(), force_rollout)?; + let result = -self.simulate(next, &mut new, force_rollout)?; // Backpropagate. node.update_stats(result) diff --git a/src/util.rs b/src/util.rs index 761e9e6..37011e2 100644 --- a/src/util.rs +++ b/src/util.rs @@ -41,10 +41,6 @@ impl<'a, G: Game> AppliedMove<'a, G> { let new = G::apply(old, m); AppliedMove { old, new, m } } - - pub(crate) fn get(&mut self) -> &mut ::S { - self.new.as_mut().unwrap_or(self.old) - } } /// Play a complete, new game with players using the two provided strategies. @@ -169,9 +165,9 @@ where } else { // Single-thread recurse. let mut count = 0; - for m in moves.iter() { - let mut new = AppliedMove::::new(&mut state, m); - count += perft_recurse::(pool, state, depth - 1, single_thread_cutoff); + for &m in moves.iter() { + let mut new = AppliedMove::::new(state, m); + count += perft_recurse::(pool, &mut new, depth - 1, single_thread_cutoff); } count }; From 28bee1c9d3bc6bd5da41574ea00942c105472730 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Tue, 21 Mar 2023 21:36:12 -0700 Subject: [PATCH 137/162] Release 0.5.1 * Fix wasm build. --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 019c855..31f5287 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "minimax" -version = "0.5.0" +version = "0.5.1" authors = [ "Samuel Fredrickson ", "Eric Roshan-Eisner " ] description = "Generic implementations of Minimax." documentation = "https://docs.rs/minimax" From 7fb389fbee78d09c351896d4f61b35cfcbebb9ac Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Wed, 22 Mar 2023 07:15:07 -0700 Subject: [PATCH 138/162] Add mancala example. Much easier to implement without undo. --- examples/mancala.rs | 186 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 186 insertions(+) create mode 100644 examples/mancala.rs diff --git a/examples/mancala.rs b/examples/mancala.rs new file mode 100644 index 0000000..5407810 --- /dev/null +++ b/examples/mancala.rs @@ -0,0 +1,186 @@ +extern crate minimax; + +use crate::minimax::{Game, Strategy}; +use std::fmt; + +#[derive(Copy, Clone)] +struct Board { + // First index by player. + // Next index by pit, counting down from 6 to 1 for the pits in play. + // Pit zero is that player's store. + // If I wanted to be crazy bit twiddly I could put these in a pair of u64s and shift stuff around. + pits: [[u8; 7]; 2], + skipped: bool, + // u1 of pits player index. + to_move: bool, +} + +impl Default for Board { + fn default() -> Board { + Board { pits: [[0, 4, 4, 4, 4, 4, 4]; 2], skipped: false, to_move: false } + } +} + +// 1-6 means play from that pit. +// 0 means pass (because of being skipped). +type Move = u8; + +struct Mancala; + +impl minimax::Game for Mancala { + type S = Board; + type M = Move; + + fn generate_moves(board: &Board, moves: &mut Vec) { + if board.skipped { + moves.push(0); + return; + } + for i in 1..7 { + if board.pits[board.to_move as usize][i] > 0 { + moves.push(i as Move); + } + } + } + + fn apply(board: &mut Board, m: Move) -> Option { + let mut board = board.clone(); + if board.skipped { + board.skipped = false; + board.to_move = !board.to_move; + return Some(board); + } + + // Grab the stones. + let mut player = board.to_move as usize; + let mut i = m as usize; + let mut stones = board.pits[player][i]; + board.pits[player][i] = 0; + // At the beginning of each iteration, it points at the previous pit. + while stones > 0 { + if player == board.to_move as usize && i == 0 { + i = 6; + player ^= 1; + } else if player != board.to_move as usize && i == 1 { + i = 6; + player ^= 1; + } else { + i -= 1; + } + board.pits[player][i] += 1; + stones -= 1; + } + + if player == board.to_move as usize { + if i == 0 { + // End condition: ends in own bowl + board.skipped = true; + } else if board.pits[player][i] == 1 { + // End condition: ends on own side in empty pit + let captured = board.pits[player][i] + board.pits[player ^ 1][7 - i]; + board.pits[player][i] = 0; + board.pits[player ^ 1][7 - i] = 0; + board.pits[player][0] += captured; + } + } + + board.to_move = !board.to_move; + Some(board) + } + + fn get_winner(board: &Board) -> Option { + if board.pits[0][1..].iter().sum::() == 0 || board.pits[1][1..].iter().sum::() == 0 + { + let to_move_total = board.pits[board.to_move as usize].iter().sum::(); + Some(if to_move_total == 24 { + minimax::Winner::Draw + } else if to_move_total > 24 { + minimax::Winner::PlayerToMove + } else { + minimax::Winner::PlayerJustMoved + }) + } else { + None + } + } + + fn zobrist_hash(board: &Board) -> u64 { + let mut hash = board.to_move as u64; + for i in 0..7 { + hash ^= HASHES[i].wrapping_mul(board.pits[0][i] as u64); + hash ^= HASHES[i + 7].wrapping_mul(board.pits[1][i] as u64); + } + hash + } + + fn null_move(_: &Board) -> Option { + Some(0) + } + + fn notation(_: &Board, m: Move) -> Option { + Some(if m == 0 { "skipped".to_owned() } else { format!("pit {}", m) }) + } + + fn table_index(m: Move) -> u16 { + m as u16 + } + fn max_table_index() -> u16 { + 6 + } +} + +impl fmt::Display for Board { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "+-----------------------+\n| |")?; + for pit in &self.pits[1][1..] { + write!(f, "{:>2}|", pit)?; + } + write!(f, " |\n+{:>2}+--+--+--+--+--+--+{:>2}+\n| ", self.pits[1][0], self.pits[0][0])?; + for pit in self.pits[0][1..].iter().rev() { + write!(f, "|{:>2}", pit)?; + } + write!(f, "| |\n+-----------------------+\n") + } +} + +#[derive(Default)] +struct Evaluator; + +impl minimax::Evaluator for Evaluator { + type G = Mancala; + fn evaluate(&self, board: &Board) -> minimax::Evaluation { + board.pits[board.to_move as usize].iter().sum::() as minimax::Evaluation - 24 + } +} + +fn main() { + let mut board = Board::default(); + let opts = minimax::IterativeOptions::new().verbose(); + let mut strategy = minimax::IterativeSearch::new(Evaluator::default(), opts); + strategy.set_timeout(std::time::Duration::from_secs(1)); + while Mancala::get_winner(&board).is_none() { + println!("{}", board); + match strategy.choose_move(&board) { + Some(m) => board = Mancala::apply(&mut board, m).unwrap(), + None => break, + } + } + println!("Winner player {:?}", board.to_move as u8 + 1); +} + +const HASHES: [u64; 14] = [ + 0x73399349585d196e, + 0xe512dc15f0da3dd1, + 0x4fbc1b81c6197db2, + 0x16b5034810111a66, + 0xa9a9d0183e33c311, + 0xbb9d7bdea0dad2d6, + 0x089d9205c11ca5c7, + 0x18d9db91aa689617, + 0x1336123120681e34, + 0xc902e6c0bd6ef6bf, + 0x16985ba0916238c1, + 0x6144c3f2ab9f6dc4, + 0xf24b4842de919a02, + 0xdd6dd35ba0c150a1, +]; From fb410c56b85d02e14444b3de8c4f9622404b8305 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Wed, 22 Mar 2023 11:37:04 -0700 Subject: [PATCH 139/162] Remove Move from docs --- src/lib.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 750a2e4..9c7e903 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,8 @@ //! The `minimax` library provides interfaces for defining two-player //! perfect-knowledge games, and strategies for choosing moves. //! -//! Any game can be defined by implementing 2 traits: Game and Move. +//! Any game can be defined by implementing the Game trait, in terms of a game +//! state type and a move type. //! ``` //! use minimax::Strategy; //! From 9faa5ccaf7a0ec0f5ccd1264f4fa5e1c9cfb44a3 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Mon, 8 May 2023 17:00:05 -0700 Subject: [PATCH 140/162] Exponential algorithm for interpretting depth in mcts --- src/strategies/mcts.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/strategies/mcts.rs b/src/strategies/mcts.rs index 895d47b..105ed5f 100644 --- a/src/strategies/mcts.rs +++ b/src/strategies/mcts.rs @@ -319,6 +319,8 @@ where fn set_max_depth(&mut self, depth: u8) { // Set some arbitrary function of rollouts. self.max_time = Duration::default(); - self.max_rollouts = depth as u32 * 100; + self.max_rollouts = 5u32 + .saturating_pow(depth as u32) + .saturating_mul(self.options.rollouts_before_expanding + 1); } } From 45b1d33e471485df489643c9e44fd2df5602c2a9 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Fri, 12 May 2023 14:45:07 -0700 Subject: [PATCH 141/162] Update rustfmt edition to avoid deprecation warning. --- .rustfmt.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.rustfmt.toml b/.rustfmt.toml index a835e4e..9d7ffed 100644 --- a/.rustfmt.toml +++ b/.rustfmt.toml @@ -1,3 +1,3 @@ -edition = "2018" -fn_args_layout = "Compressed" +edition = "2021" +fn_params_layout = "Compressed" use_small_heuristics = "Max" From 46a27d1959445a891cc56f661cf266203180617c Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Fri, 12 May 2023 15:35:53 -0700 Subject: [PATCH 142/162] mcts: add verbose mode It verifies that unweighted rollouts are terrible. --- src/strategies/mcts.rs | 67 +++++++++++++++++++++++++++++++++++------- 1 file changed, 57 insertions(+), 10 deletions(-) diff --git a/src/strategies/mcts.rs b/src/strategies/mcts.rs index 105ed5f..39b8f21 100644 --- a/src/strategies/mcts.rs +++ b/src/strategies/mcts.rs @@ -1,14 +1,16 @@ use super::super::interface::*; use super::super::util::AppliedMove; use super::sync_util::*; +use super::util::move_id; use rand::seq::SliceRandom; use rand::Rng; use std::marker::PhantomData; -use std::sync::atomic::{AtomicBool, AtomicI32, AtomicU32, Ordering}; +use std::sync::atomic::Ordering::{Relaxed, SeqCst}; +use std::sync::atomic::{AtomicBool, AtomicI32, AtomicU32}; use std::sync::Arc; use std::thread::spawn; -use std::time::Duration; +use std::time::{Duration, Instant}; struct Node { // The Move to get from the parent to here. @@ -52,7 +54,7 @@ impl Node { // Choose best child based on UCT. fn best_child(&self, exploration_score: f32) -> Option<&Node> { - let mut log_visits = (self.visits.load(Ordering::SeqCst) as f32).log2(); + let mut log_visits = (self.visits.load(SeqCst) as f32).log2(); // Keep this numerator non-negative. if log_visits < 0.0 { log_visits = 0.0; @@ -78,8 +80,8 @@ impl Node { } fn uct_score(&self, exploration_score: f32, log_parent_visits: f32) -> f32 { - let visits = self.visits.load(Ordering::Relaxed) as f32; - let score = self.score.load(Ordering::Relaxed) as f32; + let visits = self.visits.load(Relaxed) as f32; + let score = self.score.load(Relaxed) as f32; if visits == 0.0 { // Avoid NaNs. return if exploration_score > 0.0 { f32::INFINITY } else { 0.0 }; @@ -89,8 +91,8 @@ impl Node { } fn update_stats(&self, result: i32) -> Option { - self.visits.fetch_add(1, Ordering::SeqCst); - self.score.fetch_add(result, Ordering::SeqCst); + self.visits.fetch_add(1, SeqCst); + self.score.fetch_add(result, SeqCst); // Always return Some, as we aren't timed out. Some(result) } @@ -99,6 +101,7 @@ impl Node { /// Options for MonteCarloTreeSearch. #[derive(Clone)] pub struct MCTSOptions { + pub verbose: bool, max_rollout_depth: u32, rollouts_before_expanding: u32, // None means use num_cpus. @@ -108,11 +111,22 @@ pub struct MCTSOptions { impl Default for MCTSOptions { fn default() -> Self { - Self { max_rollout_depth: 100, rollouts_before_expanding: 0, num_threads: None } + Self { + verbose: false, + max_rollout_depth: 100, + rollouts_before_expanding: 0, + num_threads: None, + } } } impl MCTSOptions { + /// Enable verbose print statements after each search. + pub fn verbose(mut self) -> Self { + self.verbose = true; + self + } + /// Set a maximum depth for rollouts. Rollouts that reach this depth are /// stopped and assigned a Draw value. pub fn with_max_rollout_depth(mut self, depth: u32) -> Self { @@ -216,7 +230,7 @@ impl MonteCarloTreeSearch { where G::S: Clone, { - if self.timeout.load(Ordering::Relaxed) { + if self.timeout.load(Relaxed) { return None; } if force_rollout { @@ -227,7 +241,7 @@ impl MonteCarloTreeSearch { Some(expansion) => expansion, None => { // This is a leaf node. - if node.visits.load(Ordering::SeqCst) < self.options.rollouts_before_expanding { + if node.visits.load(SeqCst) < self.options.rollouts_before_expanding { // Just rollout from here. return node.update_stats(self.rollout(state)); } else { @@ -264,6 +278,7 @@ where G::M: Copy + Send + Sync + 'static, { fn choose_move(&mut self, s: &G::S) -> Option { + let start_time = Instant::now(); let root = Arc::new(Node::::new(None)); root.expansion.try_set(new_expansion::(s)); @@ -307,6 +322,38 @@ where thread.join().unwrap(); } + if self.options.verbose { + let total_visits = root.visits.load(Relaxed); + let duration = Instant::now().duration_since(start_time); + let rate = total_visits as f64 / num_threads as f64 / duration.as_secs_f64(); + eprintln!( + "Using {} threads, did {} total simulations with {:.1} rollouts/sec/core", + num_threads, total_visits, rate + ); + // Sort moves by visit count, largest first. + let mut children = root + .expansion + .get()? + .children + .iter() + .map(|node| (node.visits.load(Relaxed), node.score.load(Relaxed), node.m)) + .collect::>(); + children.sort_by_key(|t| !t.0); + + // Dump stats about the top 10 nodes. + let mut state = s.clone(); + for (visits, score, m) in children.into_iter().take(10) { + // Normalized so all wins is 100%, all draws is 50%, and all losses is 0%. + let win_rate = (score as f64 + visits as f64) / (visits as f64 * 2.0); + eprintln!( + "{:>6} visits, {:.02}% wins: {}", + visits, + win_rate * 100.0, + move_id::(&mut state, m) + ); + } + } + let exploration = 0.0; // Just get best node. root.best_child(exploration).map(|node| node.m.unwrap()) } From fed42720a3311eecd6406f4035363f830a623255 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Tue, 16 May 2023 09:50:16 -0700 Subject: [PATCH 143/162] mcts: use scoped threads instead of cloning everything --- src/strategies/mcts.rs | 54 ++++++++++++------------------------------ 1 file changed, 15 insertions(+), 39 deletions(-) diff --git a/src/strategies/mcts.rs b/src/strategies/mcts.rs index 39b8f21..6eb4d7b 100644 --- a/src/strategies/mcts.rs +++ b/src/strategies/mcts.rs @@ -9,7 +9,7 @@ use std::marker::PhantomData; use std::sync::atomic::Ordering::{Relaxed, SeqCst}; use std::sync::atomic::{AtomicBool, AtomicI32, AtomicU32}; use std::sync::Arc; -use std::thread::spawn; +use std::thread; use std::time::{Duration, Instant}; struct Node { @@ -160,19 +160,6 @@ pub struct MonteCarloTreeSearch { game_type: PhantomData, } -// derive is broken with PhantomData (https://github.com/rust-lang/rust/issues/26925) -impl Clone for MonteCarloTreeSearch { - fn clone(&self) -> Self { - Self { - options: self.options.clone(), - max_rollouts: self.max_rollouts, - max_time: self.max_time, - timeout: self.timeout.clone(), - game_type: PhantomData, - } - } -} - impl MonteCarloTreeSearch { pub fn new(options: MCTSOptions) -> Self { Self { @@ -273,13 +260,13 @@ impl MonteCarloTreeSearch { impl Strategy for MonteCarloTreeSearch where - G: Send + 'static, - G::S: Clone + Send + 'static, - G::M: Copy + Send + Sync + 'static, + G: Sync, + G::S: Clone + Send, + G::M: Copy + Sync, { fn choose_move(&mut self, s: &G::S) -> Option { let start_time = Instant::now(); - let root = Arc::new(Node::::new(None)); + let root = Box::new(Node::::new(None)); root.expansion.try_set(new_expansion::(s)); let num_threads = self.options.num_threads.unwrap_or_else(num_cpus::get) as u32; @@ -295,32 +282,21 @@ where timeout_signal(self.max_time) }; - let threads = (1..num_threads) - .map(|_| { - let node = root.clone(); + thread::scope(|scope| { + for i in 0..num_threads { + let node = &*root; + let mtcs = &*self; let mut state = s.clone(); - let mcts = self.clone(); - spawn(move || { - for _ in 0..rollouts_per_thread { - if mcts.simulate(&node, &mut state, false).is_none() { + scope.spawn(move || { + let rollouts = rollouts_per_thread + (i < extra) as u32; + for _ in 0..rollouts { + if mtcs.simulate(node, &mut state, false).is_none() { break; } } - }) - }) - .collect::>(); - - let mut state = s.clone(); - for _ in 0..rollouts_per_thread + extra { - if self.simulate(&root, &mut state, false).is_none() { - break; + }); } - } - - // Wait for threads. - for thread in threads { - thread.join().unwrap(); - } + }); if self.options.verbose { let total_visits = root.visits.load(Relaxed); From e451cebb8aa482463b250e37aeb59def49ccf5fb Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Tue, 16 May 2023 11:22:46 -0700 Subject: [PATCH 144/162] mcts: add custom rollout policy --- src/lib.rs | 2 +- src/strategies/mcts.rs | 119 ++++++++++++++++++++++++++++++----------- 2 files changed, 88 insertions(+), 33 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 9c7e903..bc83adf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -73,7 +73,7 @@ pub mod util; pub use interface::*; pub use strategies::iterative::{IterativeOptions, IterativeSearch, Replacement}; #[cfg(not(target_arch = "wasm32"))] -pub use strategies::mcts::{MCTSOptions, MonteCarloTreeSearch}; +pub use strategies::mcts::{MCTSOptions, MonteCarloTreeSearch, RolloutPolicy}; pub use strategies::negamax::Negamax; pub use strategies::random::Random; #[cfg(not(target_arch = "wasm32"))] diff --git a/src/strategies/mcts.rs b/src/strategies/mcts.rs index 6eb4d7b..67f1c24 100644 --- a/src/strategies/mcts.rs +++ b/src/strategies/mcts.rs @@ -3,6 +3,7 @@ use super::super::util::AppliedMove; use super::sync_util::*; use super::util::move_id; +use rand::rngs::ThreadRng; use rand::seq::SliceRandom; use rand::Rng; use std::marker::PhantomData; @@ -106,7 +107,6 @@ pub struct MCTSOptions { rollouts_before_expanding: u32, // None means use num_cpus. num_threads: Option, - // TODO: rollout_policy } impl Default for MCTSOptions { @@ -149,14 +149,76 @@ impl MCTSOptions { } } +/// Advanced random rollout policy for Monte Carlo Tree Search. +pub trait RolloutPolicy { + /// The type of game that can be evaluated. + type G: Game; + + /// Custom function to choose random move during rollouts. + /// Implementations can bias towards certain moves, ensure winning moves, etc. + /// The provided move vec is for scratch space. + fn random_move( + &self, state: &mut ::S, move_scratch: &mut Vec<::M>, + rng: &mut ThreadRng, + ) -> ::M; + + /// Implementation of a rollout over many random moves. Not needed to be overridden. + fn rollout(&self, options: &MCTSOptions, state: &::S) -> i32 + where + ::S: Clone, + { + let mut rng = rand::thread_rng(); + let mut depth = options.max_rollout_depth; + let mut state = state.clone(); + let mut moves = Vec::new(); + let mut sign = 1; + loop { + if let Some(winner) = Self::G::get_winner(&state) { + return match winner { + Winner::PlayerJustMoved => 1, + Winner::PlayerToMove => -1, + Winner::Draw => 0, + } * sign; + } + + if depth == 0 { + return 0; + } + + moves.clear(); + let m = self.random_move(&mut state, &mut moves, &mut rng); + if let Some(new_state) = Self::G::apply(&mut state, m) { + state = new_state; + } + sign = -sign; + depth -= 1; + } + } +} + +struct DumbRolloutPolicy { + game_type: PhantomData, +} + +impl RolloutPolicy for DumbRolloutPolicy { + type G = G; + fn random_move( + &self, state: &mut ::S, moves: &mut Vec<::M>, + rng: &mut ThreadRng, + ) -> ::M { + G::generate_moves(state, moves); + *moves.choose(rng).unwrap() + } +} + /// A strategy that uses random playouts to explore the game tree to decide on the best move. /// This can be used without an Evaluator, just using the rules of the game. pub struct MonteCarloTreeSearch { - // TODO: Evaluator options: MCTSOptions, max_rollouts: u32, max_time: Duration, timeout: Arc, + rollout_policy: Option + Sync>>, game_type: PhantomData, } @@ -167,6 +229,23 @@ impl MonteCarloTreeSearch { max_rollouts: 0, max_time: Duration::from_secs(5), timeout: Arc::new(AtomicBool::new(false)), + rollout_policy: None, + game_type: PhantomData, + } + } + + /// Create a searcher with a custom rollout policy. You could bias the + /// random move generation to prefer certain kinds of moves, always choose + /// winning moves, etc. + pub fn new_with_policy( + options: MCTSOptions, policy: Box + Sync>, + ) -> Self { + Self { + options, + max_rollouts: 0, + max_time: Duration::from_secs(5), + timeout: Arc::new(AtomicBool::new(false)), + rollout_policy: Some(policy), game_type: PhantomData, } } @@ -177,44 +256,20 @@ impl MonteCarloTreeSearch { self.max_rollouts = rollouts; } - // Returns score for this node. +1 for win of original player to move. - // TODO: policy options: random, look 1 ahead for winning moves, BYO Evaluator. - fn rollout(&self, s: &G::S) -> i32 + fn rollout(&self, state: &G::S) -> i32 where + G: Sync, G::S: Clone, { - let mut rng = rand::thread_rng(); - let mut depth = self.options.max_rollout_depth; - let mut state = s.clone(); - let mut moves = Vec::new(); - let mut sign = 1; - loop { - if let Some(winner) = G::get_winner(&state) { - return match winner { - Winner::PlayerJustMoved => 1, - Winner::PlayerToMove => -1, - Winner::Draw => 0, - } * sign; - } - - if depth == 0 { - return 0; - } - - moves.clear(); - G::generate_moves(&state, &mut moves); - let m = moves.choose(&mut rng).unwrap(); - if let Some(new_state) = G::apply(&mut state, *m) { - state = new_state; - } - sign = -sign; - depth -= 1; - } + self.rollout_policy.as_ref().map(|p| p.rollout(&self.options, state)).unwrap_or_else(|| { + DumbRolloutPolicy:: { game_type: PhantomData }.rollout(&self.options, state) + }) } // Explore the tree, make a new node, rollout, backpropagate. fn simulate(&self, node: &Node, state: &mut G::S, mut force_rollout: bool) -> Option where + G: Sync, G::S: Clone, { if self.timeout.load(Relaxed) { From 1a88855ce479087b736312b623c8f3efc78fb55a Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Tue, 16 May 2023 11:32:12 -0700 Subject: [PATCH 145/162] mcts: implement virtual loss --- src/strategies/mcts.rs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/strategies/mcts.rs b/src/strategies/mcts.rs index 67f1c24..db5ad45 100644 --- a/src/strategies/mcts.rs +++ b/src/strategies/mcts.rs @@ -91,9 +91,15 @@ impl Node { win_ratio + exploration_score * (2.0 * log_parent_visits / visits).sqrt() } - fn update_stats(&self, result: i32) -> Option { + fn pre_update_stats(&self) { + // Use a technicque called virtual loss to assume we've lost any + // ongoing simulation to bias concurrent threads against exploring it. self.visits.fetch_add(1, SeqCst); - self.score.fetch_add(result, SeqCst); + self.score.fetch_add(-1, SeqCst); + } + + fn update_stats(&self, result: i32) -> Option { + self.score.fetch_add(result + 1, SeqCst); // Always return Some, as we aren't timed out. Some(result) } @@ -275,6 +281,8 @@ impl MonteCarloTreeSearch { if self.timeout.load(Relaxed) { return None; } + node.pre_update_stats(); + if force_rollout { return node.update_stats(self.rollout(state)); } From 94c5d940699d5bdc34e0b9ad1bf07cff95869169 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Tue, 16 May 2023 21:32:12 -0700 Subject: [PATCH 146/162] mcts: implement endgame terminal-state propagation. Apparently this simple idea was introduced in a paper called MCTS-Solver, and I don't get their simulation argument so I didn't implement it their way. My way is stupid simple. --- src/strategies/mcts.rs | 106 +++++++++++++++++++++++++++++++---------- src/util.rs | 16 ++++--- tests/ttt.rs | 4 +- 3 files changed, 91 insertions(+), 35 deletions(-) diff --git a/src/strategies/mcts.rs b/src/strategies/mcts.rs index db5ad45..dcdb566 100644 --- a/src/strategies/mcts.rs +++ b/src/strategies/mcts.rs @@ -13,34 +13,35 @@ use std::sync::Arc; use std::thread; use std::time::{Duration, Instant}; +const WIN: i32 = i32::MAX; +// Make sure they negate to each other, unlike i32::MIN. +const LOSS: i32 = -WIN; + struct Node { // The Move to get from the parent to here. // Only None at the root. m: Option, visits: AtomicU32, // +1 for wins, -1 for losses, +0 for draws. - // From perspective of player to move. + // From perspective of the player that made this move. score: AtomicI32, + // Lazily populated if this node guarantees a particular end state. + // WIN for a guaranteed win, LOSS for a guaranteed loss. + // Not bothering with draws. + winner: AtomicI32, // Lazily populated. expansion: AtomicBox>, } struct NodeExpansion { - // Populated if this node is an end state. - winner: Option, children: Vec>, } fn new_expansion(state: &G::S) -> Box> { - let winner = G::get_winner(state); - let children = if winner.is_some() { - Vec::new() - } else { - let mut moves = Vec::new(); - G::generate_moves(state, &mut moves); - moves.into_iter().map(|m| Node::new(Some(m))).collect::>() - }; - Box::new(NodeExpansion { winner, children }) + let mut moves = Vec::new(); + G::generate_moves(state, &mut moves); + let children = moves.into_iter().map(|m| Node::new(Some(m))).collect::>(); + Box::new(NodeExpansion { children }) } impl Node { @@ -50,6 +51,7 @@ impl Node { expansion: AtomicBox::default(), visits: AtomicU32::new(0), score: AtomicI32::new(0), + winner: AtomicI32::new(0), } } @@ -81,6 +83,18 @@ impl Node { } fn uct_score(&self, exploration_score: f32, log_parent_visits: f32) -> f32 { + let winner = self.winner.load(Relaxed); + if winner < 0 { + // Large enough to be returned from best_move, smaller than any other value. + // This effectively ignores any moves that we've proved guarantee losses. + // The MCTS-Solver paper says not to do this, but I don't buy their argument. + // Those moves effectivey won't exist in our search, and we'll + // have to see if the remaining moves make the parent moves worthwhile. + return -1.0; + } + if winner > 0 { + return f32::INFINITY; + } let visits = self.visits.load(Relaxed) as f32; let score = self.score.load(Relaxed) as f32; if visits == 0.0 { @@ -99,7 +113,12 @@ impl Node { } fn update_stats(&self, result: i32) -> Option { - self.score.fetch_add(result + 1, SeqCst); + if result == WIN || result == LOSS { + self.winner.store(result, SeqCst); + } else { + // Adjust for virtual loss. + self.score.fetch_add(result + 1, SeqCst); + } // Always return Some, as we aren't timed out. Some(result) } @@ -180,9 +199,22 @@ pub trait RolloutPolicy { let mut sign = 1; loop { if let Some(winner) = Self::G::get_winner(&state) { + let first = depth == options.max_rollout_depth; return match winner { - Winner::PlayerJustMoved => 1, - Winner::PlayerToMove => -1, + Winner::PlayerJustMoved => { + if first { + WIN + } else { + 1 + } + } + Winner::PlayerToMove => { + if first { + LOSS + } else { + -1 + } + } Winner::Draw => 0, } * sign; } @@ -281,6 +313,10 @@ impl MonteCarloTreeSearch { if self.timeout.load(Relaxed) { return None; } + let winner = node.winner.load(Relaxed); + if winner != 0 { + return Some(winner); + } node.pre_update_stats(); if force_rollout { @@ -291,10 +327,17 @@ impl MonteCarloTreeSearch { Some(expansion) => expansion, None => { // This is a leaf node. - if node.visits.load(SeqCst) < self.options.rollouts_before_expanding { + if node.visits.load(SeqCst) <= self.options.rollouts_before_expanding { // Just rollout from here. return node.update_stats(self.rollout(state)); } else { + // Check for terminal node. + match G::get_winner(state) { + Some(Winner::PlayerJustMoved) => return node.update_stats(WIN), + Some(Winner::PlayerToMove) => return node.update_stats(LOSS), + Some(Winner::Draw) => return node.update_stats(0), + _ => {} + } // Expand this node, and force a rollout when we recurse. force_rollout = true; node.expansion.try_set(new_expansion::(state)) @@ -302,19 +345,30 @@ impl MonteCarloTreeSearch { } }; - if let Some(winner) = expansion.winner { - return node.update_stats(match winner { - Winner::PlayerJustMoved => 1, - Winner::PlayerToMove => -1, - Winner::Draw => 0, - }); - } - // Recurse. - let next = node.best_child(1.).unwrap(); + let next = match node.best_child(1.) { + Some(child) => child, + // TODO: Weird race condition? + None => return Some(0), + }; let m = next.m.as_ref().unwrap(); let mut new = AppliedMove::::new(state, *m); - let result = -self.simulate(next, &mut new, force_rollout)?; + let child_result = self.simulate(next, &mut new, force_rollout)?; + + // Propagate up forced wins and losses. + let result = if child_result == WIN { + // Having a guaranteed win child makes you a loser parent. + LOSS + } else if child_result == LOSS { + // Having all guaranteed loser children makes you a winner parent. + if expansion.children.iter().all(|node| node.winner.load(Relaxed) == LOSS) { + WIN + } else { + -1 + } + } else { + -child_result + }; // Backpropagate. node.update_stats(result) diff --git a/src/util.rs b/src/util.rs index 37011e2..3bacc4b 100644 --- a/src/util.rs +++ b/src/util.rs @@ -57,7 +57,14 @@ where let mut state = G::S::default(); let mut strategies: [&mut dyn interface::Strategy; 2] = [s1, s2]; let mut s = 0; - while G::get_winner(&state).is_none() { + loop { + if let Some(winner) = G::get_winner(&state) { + return match winner { + interface::Winner::Draw => None, + interface::Winner::PlayerJustMoved => Some(1 - s), + interface::Winner::PlayerToMove => Some(s), + }; + } let strategy = &mut strategies[s]; match strategy.choose_move(&state) { Some(m) => { @@ -65,15 +72,10 @@ where state = new_state; } } - None => break, + None => return None, } s = 1 - s; } - match G::get_winner(&state).unwrap() { - interface::Winner::Draw => None, - interface::Winner::PlayerJustMoved => Some(1 - s), - interface::Winner::PlayerToMove => Some(s), - } } pub(crate) struct MovePool { diff --git a/tests/ttt.rs b/tests/ttt.rs index 4adc765..9d93ffd 100644 --- a/tests/ttt.rs +++ b/tests/ttt.rs @@ -30,9 +30,9 @@ fn test_ttt_negamax_vs_random_always_wins_or_draws() { #[test] fn test_ttt_mcts_vs_random_always_wins_or_draws() { let mut s1 = MonteCarloTreeSearch::new(MCTSOptions::default().with_num_threads(1)); - s1.set_max_rollouts(500); + s1.set_max_rollouts(100); let mut s2 = Random::new(); - for _ in 0..10 { + for _ in 0..50 { assert_ne!(battle_royale::(&mut s1, &mut s2), Some(1)); } } From 5ec9fb2aa6fa32d3866775a8ec6f339b41b3efef Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Wed, 17 May 2023 10:37:18 -0700 Subject: [PATCH 147/162] mcts: pick random best child more uniformly --- src/strategies/mcts.rs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/strategies/mcts.rs b/src/strategies/mcts.rs index dcdb566..d5d2127 100644 --- a/src/strategies/mcts.rs +++ b/src/strategies/mcts.rs @@ -65,9 +65,14 @@ impl Node { let expansion = self.expansion.get()?; // Find a node, randomly chosen among the best scores. - // TODO: make it more uniformly random? let n = expansion.children.len(); - let mut i = rand::thread_rng().gen_range(0..n); + // To make the choice more uniformly random among the best moves, + // start at a random offset and stride by a random amount. + // The stride must be coprime with n, so pick from a set of large primes. + let mut rng = rand::thread_rng(); + let mut i = rng.gen_range(0..n); + static PRIMES: [usize; 8] = [14323, 30553, 50221, 51991, 53201, 64891, 72763, 74471]; + let stride = PRIMES.choose(&mut rng).unwrap(); let mut best_score = f32::NEG_INFINITY; let mut best_child = None; for _ in 0..n { @@ -77,7 +82,7 @@ impl Node { best_score = score; best_child = Some(&expansion.children[i]); } - i = (i + 1) % n; + i = (i + stride) % n; } best_child } From e2ffde1935030f654d39b7c26ae7453f5f497171 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Mon, 5 Jun 2023 09:19:02 -0700 Subject: [PATCH 148/162] mcts: add principal variation --- src/strategies/mcts.rs | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/src/strategies/mcts.rs b/src/strategies/mcts.rs index d5d2127..d82fb04 100644 --- a/src/strategies/mcts.rs +++ b/src/strategies/mcts.rs @@ -1,7 +1,7 @@ use super::super::interface::*; use super::super::util::AppliedMove; use super::sync_util::*; -use super::util::move_id; +use super::util::{move_id, pv_string}; use rand::rngs::ThreadRng; use rand::seq::SliceRandom; @@ -262,6 +262,7 @@ pub struct MonteCarloTreeSearch { max_time: Duration, timeout: Arc, rollout_policy: Option + Sync>>, + pv: Vec, game_type: PhantomData, } @@ -273,6 +274,7 @@ impl MonteCarloTreeSearch { max_time: Duration::from_secs(5), timeout: Arc::new(AtomicBool::new(false)), rollout_policy: None, + pv: Vec::new(), game_type: PhantomData, } } @@ -289,6 +291,7 @@ impl MonteCarloTreeSearch { max_time: Duration::from_secs(5), timeout: Arc::new(AtomicBool::new(false)), rollout_policy: Some(policy), + pv: Vec::new(), game_type: PhantomData, } } @@ -420,6 +423,14 @@ where } }); + // Compute PV. + self.pv.clear(); + let mut node = &*root; + while let Some(best) = node.best_child(0.0) { + self.pv.push(best.m.unwrap()); + node = best; + } + if self.options.verbose { let total_visits = root.visits.load(Relaxed); let duration = Instant::now().duration_since(start_time); @@ -450,6 +461,9 @@ where move_id::(&mut state, m) ); } + + // Dump PV. + eprintln!("Principal variation: {}", pv_string::(&self.pv[..], s)); } let exploration = 0.0; // Just get best node. @@ -468,4 +482,8 @@ where .saturating_pow(depth as u32) .saturating_mul(self.options.rollouts_before_expanding + 1); } + + fn principal_variation(&self) -> Vec { + self.pv.clone() + } } From 8e018874e68ff2112fc9a4d106d1cb8a6f7744d9 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Wed, 7 Jun 2023 15:54:53 -0700 Subject: [PATCH 149/162] Release 0.5.2 * Various MCTS algorithmic improvements. * Verbose mode for MCTS. * Customizable move randomizer for MCTS. --- Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 31f5287..a21fa6a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "minimax" -version = "0.5.1" -authors = [ "Samuel Fredrickson ", "Eric Roshan-Eisner " ] +version = "0.5.2" +authors = [ "Eric Roshan-Eisner ", "Samuel Fredrickson " ] description = "Generic implementations of Minimax." documentation = "https://docs.rs/minimax" edition = "2021" From 24ff09b5c1831dad5e49e1a2e8f501f07bada918 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Mon, 27 Nov 2023 11:18:37 -0800 Subject: [PATCH 150/162] Remove verbose-mode dependency on zobrist_hash. Leads to confusing crashes in MCTS. The hashes were probably not that useful for humans anyway. --- src/strategies/iterative.rs | 4 ++-- src/strategies/mcts.rs | 3 +-- src/strategies/util.rs | 10 +++------- src/strategies/ybw.rs | 2 +- 4 files changed, 7 insertions(+), 12 deletions(-) diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index 5bacf75..a2ce194 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -687,7 +687,7 @@ where depth, interval.as_millis(), entry.bounds(), - move_id::(&mut s_clone, entry.best_move) + move_id::(&s_clone, entry.best_move) ); interval_start = end; } @@ -710,7 +710,7 @@ where depth, interval.as_millis(), entry.value_string(), - move_id::(&mut s_clone, best_move) + move_id::(&s_clone, best_move) ); } diff --git a/src/strategies/mcts.rs b/src/strategies/mcts.rs index d82fb04..2a88a18 100644 --- a/src/strategies/mcts.rs +++ b/src/strategies/mcts.rs @@ -450,7 +450,6 @@ where children.sort_by_key(|t| !t.0); // Dump stats about the top 10 nodes. - let mut state = s.clone(); for (visits, score, m) in children.into_iter().take(10) { // Normalized so all wins is 100%, all draws is 50%, and all losses is 0%. let win_rate = (score as f64 + visits as f64) / (visits as f64 * 2.0); @@ -458,7 +457,7 @@ where "{:>6} visits, {:.02}% wins: {}", visits, win_rate * 100.0, - move_id::(&mut state, m) + move_id::(s, m) ); } diff --git a/src/strategies/util.rs b/src/strategies/util.rs index 0f659a0..c78387b 100644 --- a/src/strategies/util.rs +++ b/src/strategies/util.rs @@ -1,5 +1,4 @@ use super::super::interface::*; -use super::super::util::AppliedMove; // For values near winning and losing values, push them slightly closer to zero. // A win in 3 moves (BEST-3) will be chosen over a win in 5 moves (BEST-5). @@ -26,12 +25,9 @@ pub(super) fn unclamp_value(value: Evaluation) -> Evaluation { } // Return a unique id for humans for this move. -pub(super) fn move_id(s: &mut ::S, m: Option<::M>) -> String { +pub(super) fn move_id(s: &::S, m: Option<::M>) -> String { if let Some(mov) = m { - G::notation(s, mov).unwrap_or_else(|| { - let new = AppliedMove::::new(s, mov); - format!("{:06x}", G::zobrist_hash(&new) & 0xffffff) - }) + G::notation(s, mov).unwrap_or("no notation impl".to_string()) } else { "none".to_string() } @@ -48,7 +44,7 @@ where if i > 0 { out.push_str("; "); } - out.push_str(move_id::(&mut state, Some(m)).as_str()); + out.push_str(move_id::(&state, Some(m)).as_str()); if let Some(new_state) = G::apply(&mut state, m) { state = new_state; } diff --git a/src/strategies/ybw.rs b/src/strategies/ybw.rs index 0f91bd7..31ea981 100644 --- a/src/strategies/ybw.rs +++ b/src/strategies/ybw.rs @@ -365,7 +365,7 @@ where depth, interval.as_millis(), entry.value_string(), - move_id::(&mut state, best_move) + move_id::(&state, best_move) ); } From 32d49bb5a8fa198921949bfaa61d19740095474c Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Mon, 27 Nov 2023 11:27:12 -0800 Subject: [PATCH 151/162] Release 0.5.3 * Don't call zobrist_hash from MCTS's verbose mode. --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index a21fa6a..665739a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "minimax" -version = "0.5.2" +version = "0.5.3" authors = [ "Eric Roshan-Eisner ", "Samuel Fredrickson " ] description = "Generic implementations of Minimax." documentation = "https://docs.rs/minimax" From 87538e1f0499f6d38bbfd1c0d486ac883ec97194 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Fri, 8 Dec 2023 11:13:03 -0800 Subject: [PATCH 152/162] mcts: Factor out random_best into util library. --- src/strategies/mcts.rs | 27 ++++----------------------- src/strategies/util.rs | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 23 deletions(-) diff --git a/src/strategies/mcts.rs b/src/strategies/mcts.rs index 2a88a18..80670b4 100644 --- a/src/strategies/mcts.rs +++ b/src/strategies/mcts.rs @@ -1,11 +1,10 @@ use super::super::interface::*; use super::super::util::AppliedMove; use super::sync_util::*; -use super::util::{move_id, pv_string}; +use super::util::{move_id, pv_string, random_best}; use rand::rngs::ThreadRng; use rand::seq::SliceRandom; -use rand::Rng; use std::marker::PhantomData; use std::sync::atomic::Ordering::{Relaxed, SeqCst}; use std::sync::atomic::{AtomicBool, AtomicI32, AtomicU32}; @@ -64,27 +63,9 @@ impl Node { } let expansion = self.expansion.get()?; - // Find a node, randomly chosen among the best scores. - let n = expansion.children.len(); - // To make the choice more uniformly random among the best moves, - // start at a random offset and stride by a random amount. - // The stride must be coprime with n, so pick from a set of large primes. - let mut rng = rand::thread_rng(); - let mut i = rng.gen_range(0..n); - static PRIMES: [usize; 8] = [14323, 30553, 50221, 51991, 53201, 64891, 72763, 74471]; - let stride = PRIMES.choose(&mut rng).unwrap(); - let mut best_score = f32::NEG_INFINITY; - let mut best_child = None; - for _ in 0..n { - let score = expansion.children[i].uct_score(exploration_score, log_visits); - debug_assert!(!score.is_nan()); - if score > best_score { - best_score = score; - best_child = Some(&expansion.children[i]); - } - i = (i + stride) % n; - } - best_child + random_best(expansion.children.as_slice(), |node| { + node.uct_score(exploration_score, log_visits) + }) } fn uct_score(&self, exploration_score: f32, log_parent_visits: f32) -> f32 { diff --git a/src/strategies/util.rs b/src/strategies/util.rs index c78387b..7fd842f 100644 --- a/src/strategies/util.rs +++ b/src/strategies/util.rs @@ -1,5 +1,7 @@ use super::super::interface::*; +use rand::Rng; + // For values near winning and losing values, push them slightly closer to zero. // A win in 3 moves (BEST-3) will be chosen over a win in 5 moves (BEST-5). // A loss in 5 moves (WORST+5) will be chosen over a loss in 3 moves (WORST+3). @@ -85,3 +87,41 @@ impl ValueMove { (self.value, self.m) } } + +static PRIMES: [usize; 16] = [ + 14323, 18713, 19463, 30553, 33469, 45343, 50221, 51991, 53201, 56923, 64891, 72763, 74471, + 81647, 92581, 94693, +]; + +// Find and return the highest scoring element of the set. +// If multiple elements have the highest score, select one randomly. +// Constraints: +// - Don't call the scoring function more than once per element. +// - Select one uniformly, so that a run of high scores doesn't +// bias towards the one that scans first. +// - Don't shuffle the input or allocate a new array for shuffling. +// - Optimized for sets with <10k values. +pub(super) fn random_best f32>(set: &[T], score_fn: F) -> Option<&T> { + // To make the choice more uniformly random among the best moves, + // start at a random offset and stride by a random amount. + // The stride must be coprime with n, so pick from a set of 5 digit primes. + + let n = set.len(); + // Combine both random numbers into a single rng call. + let r = rand::thread_rng().gen_range(0..n * PRIMES.len()); + let mut i = r / PRIMES.len(); + let stride = PRIMES[r % PRIMES.len()]; + + let mut best_score = f32::NEG_INFINITY; + let mut best = None; + for _ in 0..n { + let score = score_fn(&set[i]); + debug_assert!(!score.is_nan()); + if score > best_score { + best_score = score; + best = Some(&set[i]); + } + i = (i + stride) % n; + } + best +} From 70dbd40b81ec5e2166eaa0bbbc3a874fac318a1b Mon Sep 17 00:00:00 2001 From: Lege19 <111765057+Lege19@users.noreply.github.com> Date: Sun, 14 Jan 2024 18:11:29 +0000 Subject: [PATCH 153/162] Removed Copy trait bound on Game::M to make it easier to implement games with arbitrarily complex moves where heap alocation is neccessary and therefore the copy trait cannot be easily implemented (if at all) --- examples/chess/src/main.rs | 6 +++--- examples/connect4.rs | 4 ++-- examples/mancala.rs | 6 +++--- examples/ttt.rs | 6 +++--- src/interface.rs | 12 ++++++------ src/lib.rs | 2 +- src/strategies/iterative.rs | 8 ++++---- src/strategies/mcts.rs | 8 ++++---- src/strategies/negamax.rs | 4 ++-- src/strategies/table.rs | 2 +- src/strategies/util.rs | 2 +- src/strategies/ybw.rs | 12 ++++++------ src/util.rs | 24 ++++++++++++------------ tests/strategies.rs | 20 ++++++++++---------- 14 files changed, 58 insertions(+), 58 deletions(-) diff --git a/examples/chess/src/main.rs b/examples/chess/src/main.rs index 09c3018..84c8959 100644 --- a/examples/chess/src/main.rs +++ b/examples/chess/src/main.rs @@ -24,8 +24,8 @@ impl minimax::Game for Chess { } } - fn apply(b: &mut Board, m: ChessMove) -> Option { - Some(b.make_move_new(m)) + fn apply(b: &mut Board, m: &ChessMove) -> Option { + Some(b.make_move_new(*m)) } fn zobrist_hash(b: &Board) -> u64 { @@ -74,7 +74,7 @@ fn main() { while Chess::get_winner(&b).is_none() { println!("{}", b); match strategy.choose_move(&b) { - Some(m) => b = Chess::apply(&mut b, m).unwrap(), + Some(m) => b = Chess::apply(&mut b, &m).unwrap(), None => break, } } diff --git a/examples/connect4.rs b/examples/connect4.rs index 2d81670..ca43849 100644 --- a/examples/connect4.rs +++ b/examples/connect4.rs @@ -128,7 +128,7 @@ impl minimax::Game for Game { } } - fn apply(b: &mut Board, place: Place) -> Option { + fn apply(b: &mut Board, place: &Place) -> Option { let mut b = b.clone(); let col = (b.all_pieces >> place.col_shift()) & COL_MASK; let new_piece = (col + 1) << place.col_shift(); @@ -278,7 +278,7 @@ fn main() { Some(m) => { let color = if b.reds_move() { "Red" } else { "Yellow" }; println!("{} piece in column {}", color, m.col + 1); - b = self::Game::apply(&mut b, m).unwrap(); + b = self::Game::apply(&mut b, &m).unwrap(); } None => break, } diff --git a/examples/mancala.rs b/examples/mancala.rs index 5407810..f266c38 100644 --- a/examples/mancala.rs +++ b/examples/mancala.rs @@ -43,7 +43,7 @@ impl minimax::Game for Mancala { } } - fn apply(board: &mut Board, m: Move) -> Option { + fn apply(board: &mut Board, m: &Move) -> Option { let mut board = board.clone(); if board.skipped { board.skipped = false; @@ -53,7 +53,7 @@ impl minimax::Game for Mancala { // Grab the stones. let mut player = board.to_move as usize; - let mut i = m as usize; + let mut i = *m as usize; let mut stones = board.pits[player][i]; board.pits[player][i] = 0; // At the beginning of each iteration, it points at the previous pit. @@ -161,7 +161,7 @@ fn main() { while Mancala::get_winner(&board).is_none() { println!("{}", board); match strategy.choose_move(&board) { - Some(m) => board = Mancala::apply(&mut board, m).unwrap(), + Some(m) => board = Mancala::apply(&mut board, &m).unwrap(), None => break, } } diff --git a/examples/ttt.rs b/examples/ttt.rs index eaf7e3e..6b7cc67 100644 --- a/examples/ttt.rs +++ b/examples/ttt.rs @@ -152,12 +152,12 @@ impl minimax::Game for Game { } } - fn apply(b: &mut Board, m: Place) -> Option { + fn apply(b: &mut Board, m: &Place) -> Option { b.squares[m.i as usize] = b.to_move; b.to_move = b.to_move.invert(); None } - fn undo(b: &mut Board, m: Place) { + fn undo(b: &mut Board, m: &Place) { b.squares[m.i as usize] = Square::Empty; b.to_move = b.to_move.invert(); } @@ -247,7 +247,7 @@ fn main() { println!("{}", b); let ref mut strategy = strategies[s]; match strategy.choose_move(&mut b) { - Some(m) => self::Game::apply(&mut b, m), + Some(m) => self::Game::apply(&mut b, &m), None => break, }; s = 1 - s; diff --git a/src/interface.rs b/src/interface.rs index 73b9894..4c0f365 100644 --- a/src/interface.rs +++ b/src/interface.rs @@ -81,7 +81,7 @@ pub trait Game: Sized { /// The type of the game state. type S; /// The type of game moves. - type M: Copy; + type M; /// Generate moves at the given state. fn generate_moves(state: &Self::S, moves: &mut Vec); @@ -96,11 +96,11 @@ pub trait Game: Sized { /// ``` /// struct BigBoard([u8; 4096]); /// struct BigMove(u16); - /// fn apply(state: &mut BigBoard, m: BigMove) -> Option { + /// fn apply(state: &mut BigBoard, m: &BigMove) -> Option { /// state.0[m.0 as usize] += 1; /// None /// } - /// fn undo(state: &mut BigBoard, m: BigMove) { + /// fn undo(state: &mut BigBoard, m: &BigMove) { /// state.0[m.0 as usize] -= 1; /// } /// ``` @@ -109,14 +109,14 @@ pub trait Game: Sized { /// ``` /// struct SmallBoard(u64); /// struct SmallMove(u8); - /// fn apply(state: &mut SmallBoard, m: SmallMove) -> Option { + /// fn apply(state: &mut SmallBoard, m: &SmallMove) -> Option { /// Some(SmallBoard(state.0 | (1< Option; + fn apply(state: &mut Self::S, m: &Self::M) -> Option; /// Undo mutation done in apply, if any. - fn undo(_state: &mut Self::S, _m: Self::M) {} + fn undo(_state: &mut Self::S, _m: &Self::M) {} /// Returns `Some(PlayerJustMoved)` or `Some(PlayerToMove)` if there's a winner, /// `Some(Draw)` if the state is terminal without a winner, and `None` if diff --git a/src/lib.rs b/src/lib.rs index bc83adf..6e71f63 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -42,7 +42,7 @@ //! } //! } //! -//! fn apply(state: &mut War, tug: Tug) -> Option { +//! fn apply(state: &mut War, tug: &Tug) -> Option { //! Some(War(state.0 + tug.0)) //! } //! } diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index a2ce194..abb4ad8 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -375,7 +375,7 @@ where self.eval.evaluate(s) >= beta { // If we just pass and let the opponent play this position (at reduced depth), - let mut nulled = AppliedMove::::new(s, null_move); + let mut nulled = AppliedMove::::new(s, &null_move); let value = -self.negamax(&mut nulled, None, depth - depth_reduction, -beta, -beta + 1)?; // is the result still so good that we shouldn't bother with a full search? @@ -411,7 +411,7 @@ where let mut best = WORST_EVAL; for m in moves.iter() { - let mut new = AppliedMove::::new(s, *m); + let mut new = AppliedMove::::new(s, m); let value = -self.noisy_negamax(&mut new, depth - 1, -beta, -alpha)?; best = max(best, value); alpha = max(alpha, value); @@ -480,7 +480,7 @@ where let mut best_move = moves[0]; let mut null_window = false; for &m in moves.iter() { - let mut new = AppliedMove::::new(s, m); + let mut new = AppliedMove::::new(s, &m); let value = if null_window { let probe = -self.negamax(&mut new, Some(m), depth - 1, -alpha - 1, -alpha)?; if probe > alpha && probe < beta { @@ -534,7 +534,7 @@ where let mut alpha = WORST_EVAL; let beta = BEST_EVAL; for value_move in moves.iter_mut() { - let mut new = AppliedMove::::new(s, value_move.m); + let mut new = AppliedMove::::new(s, &value_move.m); let value = -self.negamax(&mut new, Some(value_move.m), depth - 1, -beta, -alpha)?; alpha = max(alpha, value); diff --git a/src/strategies/mcts.rs b/src/strategies/mcts.rs index 80670b4..851ccc3 100644 --- a/src/strategies/mcts.rs +++ b/src/strategies/mcts.rs @@ -4,7 +4,7 @@ use super::sync_util::*; use super::util::{move_id, pv_string, random_best}; use rand::rngs::ThreadRng; -use rand::seq::SliceRandom; +use rand::Rng; use std::marker::PhantomData; use std::sync::atomic::Ordering::{Relaxed, SeqCst}; use std::sync::atomic::{AtomicBool, AtomicI32, AtomicU32}; @@ -211,7 +211,7 @@ pub trait RolloutPolicy { moves.clear(); let m = self.random_move(&mut state, &mut moves, &mut rng); - if let Some(new_state) = Self::G::apply(&mut state, m) { + if let Some(new_state) = Self::G::apply(&mut state, &m) { state = new_state; } sign = -sign; @@ -231,7 +231,7 @@ impl RolloutPolicy for DumbRolloutPolicy { rng: &mut ThreadRng, ) -> ::M { G::generate_moves(state, moves); - *moves.choose(rng).unwrap() + moves.swap_remove(rng.gen_range(0..moves.len())) } } @@ -341,7 +341,7 @@ impl MonteCarloTreeSearch { None => return Some(0), }; let m = next.m.as_ref().unwrap(); - let mut new = AppliedMove::::new(state, *m); + let mut new = AppliedMove::::new(state, m); let child_result = self.simulate(next, &mut new, force_rollout)?; // Propagate up forced wins and losses. diff --git a/src/strategies/negamax.rs b/src/strategies/negamax.rs index f030122..e98197d 100644 --- a/src/strategies/negamax.rs +++ b/src/strategies/negamax.rs @@ -49,7 +49,7 @@ impl Negamax { E::G::generate_moves(s, &mut moves); let mut best = WORST_EVAL; for m in moves.iter() { - let mut new = AppliedMove::::new(s, *m); + let mut new = AppliedMove::::new(s, m); let value = -self.negamax(&mut new, depth - 1, -beta, -alpha); best = max(best, value); alpha = max(alpha, value); @@ -85,7 +85,7 @@ where let mut s_clone = s.clone(); for &m in moves.iter() { // determine value for this move - let mut new = AppliedMove::::new(&mut s_clone, m); + let mut new = AppliedMove::::new(&mut s_clone, &m); let value = -self.negamax(&mut new, self.max_depth - 1, WORST_EVAL, -best); // Strictly better than any move found so far. if value > best { diff --git a/src/strategies/table.rs b/src/strategies/table.rs index abc6765..b8b0181 100644 --- a/src/strategies/table.rs +++ b/src/strategies/table.rs @@ -126,7 +126,7 @@ pub(super) trait Table { // equivalent upper and lower bounds. let m = entry.best_move.unwrap(); pv.push(m); - if let Some(new_state) = G::apply(&mut state, m) { + if let Some(new_state) = G::apply(&mut state, &m) { state = new_state; } hash = G::zobrist_hash(&state); diff --git a/src/strategies/util.rs b/src/strategies/util.rs index 7fd842f..52953d3 100644 --- a/src/strategies/util.rs +++ b/src/strategies/util.rs @@ -47,7 +47,7 @@ where out.push_str("; "); } out.push_str(move_id::(&state, Some(m)).as_str()); - if let Some(new_state) = G::apply(&mut state, m) { + if let Some(new_state) = G::apply(&mut state, &m) { state = new_state; } } diff --git a/src/strategies/ybw.rs b/src/strategies/ybw.rs index 31ea981..1712e3a 100644 --- a/src/strategies/ybw.rs +++ b/src/strategies/ybw.rs @@ -119,7 +119,7 @@ where self.eval.evaluate(s) >= beta { // If we just pass and let the opponent play this position (at reduced depth), - let mut nulled = AppliedMove::::new(s, null_move); + let mut nulled = AppliedMove::::new(s, &null_move); let value = -self.negamax(&mut nulled, None, depth - depth_reduction, -beta, -beta + 1)?; // is the result still so good that we shouldn't bother with a full search? @@ -156,7 +156,7 @@ where let mut best = WORST_EVAL; for &m in moves.iter() { - let mut new = AppliedMove::::new(s, m); + let mut new = AppliedMove::::new(s, &m); let value = -self.noisy_negamax(&mut new, depth - 1, -beta, -alpha)?; best = max(best, value); alpha = max(alpha, value); @@ -226,7 +226,7 @@ where // Evaluate first move serially. let initial_value = { - let mut new = AppliedMove::::new(s, first_move); + let mut new = AppliedMove::::new(s, &first_move); -self.negamax(&mut new, Some(first_move), depth - 1, -beta, -alpha)? }; alpha = max(alpha, initial_value); @@ -239,7 +239,7 @@ where let mut best_move = first_move; let mut null_window = false; for &m in moves[1..].iter() { - let mut new = AppliedMove::::new(s, m); + let mut new = AppliedMove::::new(s, &m); let value = if null_window { let probe = -self.negamax(&mut new, Some(m), depth - 1, -alpha - 1, -alpha)?; if probe > alpha && probe < beta { @@ -279,7 +279,7 @@ where } let mut state = s.clone(); - let mut new = AppliedMove::::new(&mut state, m); + let mut new = AppliedMove::::new(&mut state, &m); let value = if self.opts.null_window_search && initial_alpha > alpha_orig { // TODO: send reference to alpha as neg_beta to children. let probe = -self.negamax( @@ -492,7 +492,7 @@ where &self.thread_pool, ); let mut state = s.clone(); - if let Some(new_state) = E::G::apply(&mut state, best_move) { + if let Some(new_state) = E::G::apply(&mut state, &best_move) { state = new_state; } // Launch in threadpool asynchronously. diff --git a/src/util.rs b/src/util.rs index 3bacc4b..d4badb7 100644 --- a/src/util.rs +++ b/src/util.rs @@ -11,35 +11,35 @@ use rayon::prelude::*; use std::default::Default; use std::time::Instant; -pub(crate) struct AppliedMove<'a, G: Game> { +pub(crate) struct AppliedMove<'a, 'b, G: Game> { old: &'a mut ::S, new: Option<::S>, - m: ::M, + m: &'b ::M, } -impl<'a, G: Game> std::ops::Deref for AppliedMove<'a, G> { +impl<'a, 'b, G: Game> std::ops::Deref for AppliedMove<'a, 'b, G> { type Target = ::S; fn deref(&self) -> &::S { self.new.as_ref().unwrap_or(self.old) } } -impl<'a, G: Game> std::ops::DerefMut for AppliedMove<'a, G> { +impl<'a, 'b, G: Game> std::ops::DerefMut for AppliedMove<'a, 'b, G> { fn deref_mut(&mut self) -> &mut ::S { self.new.as_mut().unwrap_or(self.old) } } -impl<'a, G: Game> Drop for AppliedMove<'a, G> { +impl<'a, 'b, G: Game> Drop for AppliedMove<'a, 'b, G> { fn drop(&mut self) { - ::undo(self.old, self.m) + ::undo(self.old, &self.m) } } -impl<'a, G: Game> AppliedMove<'a, G> { - pub(crate) fn new(old: &'a mut ::S, m: ::M) -> Self { +impl<'a, 'b, G: Game> AppliedMove<'a, 'b, G> { + pub(crate) fn new(old: &'a mut ::S, m: &'b ::M) -> Self { let new = G::apply(old, m); - AppliedMove { old, new, m } + AppliedMove { old, new, m: m } } } @@ -68,7 +68,7 @@ where let strategy = &mut strategies[s]; match strategy.choose_move(&state) { Some(m) => { - if let Some(new_state) = G::apply(&mut state, m) { + if let Some(new_state) = G::apply(&mut state, &m) { state = new_state; } } @@ -122,7 +122,7 @@ where // Single-thread recurse. let mut count = 0; for &m in moves.iter() { - let mut new = AppliedMove::::new(state, m); + let mut new = AppliedMove::::new(state, &m); count += perft_recurse::(pool, &mut new, depth - 1, single_thread_cutoff); } count @@ -134,7 +134,7 @@ where .map(|m| { let mut state = state.clone(); let mut pool2 = MovePool::::default(); - if let Some(new_state) = G::apply(&mut state, *m) { + if let Some(new_state) = G::apply(&mut state, m) { state = new_state; } perft_recurse::(&mut pool2, &mut state, depth - 1, single_thread_cutoff) diff --git a/tests/strategies.rs b/tests/strategies.rs index bc290df..54850e4 100644 --- a/tests/strategies.rs +++ b/tests/strategies.rs @@ -42,7 +42,7 @@ impl PlainNegamax { E::G::generate_moves(s, &mut moves); let mut best = WORST_EVAL; for &m in moves.iter() { - let mut new = E::G::apply(s, m).unwrap(); + let mut new = E::G::apply(s, &m).unwrap(); let value = -self.negamax(&mut new, depth - 1); best = max(best, value); } @@ -63,7 +63,7 @@ where let mut best_value = WORST_EVAL; let mut s = s.clone(); for &m in moves.iter() { - let mut new = E::G::apply(&mut s, m).unwrap(); + let mut new = E::G::apply(&mut s, &m).unwrap(); let value = -self.negamax(&mut new, self.depth - 1); if value == best_value { self.best_moves.push(m); @@ -107,7 +107,7 @@ fn generate_random_state(depth: u8) -> connect4::Board { let mut moves = Vec::new(); connect4::Game::generate_moves(&b, &mut moves); let m = moves.choose(&mut rng).unwrap(); - let next = connect4::Game::apply(&mut b, *m).unwrap(); + let next = connect4::Game::apply(&mut b, m).unwrap(); if connect4::Game::get_winner(&next).is_some() { // Oops, undo and try again on the next iter. } else { @@ -120,13 +120,13 @@ fn generate_random_state(depth: u8) -> connect4::Board { #[test] fn test_winning_position() { let mut b = connect4::Board::default(); - b = connect4::Game::apply(&mut b, connect4::Place { col: 2 }).unwrap(); - b = connect4::Game::apply(&mut b, connect4::Place { col: 3 }).unwrap(); - b = connect4::Game::apply(&mut b, connect4::Place { col: 2 }).unwrap(); - b = connect4::Game::apply(&mut b, connect4::Place { col: 3 }).unwrap(); - b = connect4::Game::apply(&mut b, connect4::Place { col: 2 }).unwrap(); - b = connect4::Game::apply(&mut b, connect4::Place { col: 3 }).unwrap(); - b = connect4::Game::apply(&mut b, connect4::Place { col: 2 }).unwrap(); + b = connect4::Game::apply(&mut b, &connect4::Place { col: 2 }).unwrap(); + b = connect4::Game::apply(&mut b, &connect4::Place { col: 3 }).unwrap(); + b = connect4::Game::apply(&mut b, &connect4::Place { col: 2 }).unwrap(); + b = connect4::Game::apply(&mut b, &connect4::Place { col: 3 }).unwrap(); + b = connect4::Game::apply(&mut b, &connect4::Place { col: 2 }).unwrap(); + b = connect4::Game::apply(&mut b, &connect4::Place { col: 3 }).unwrap(); + b = connect4::Game::apply(&mut b, &connect4::Place { col: 2 }).unwrap(); assert_eq!(Some(Winner::PlayerJustMoved), connect4::Game::get_winner(&b)); // Make sure none of the strategies die when given a winning position. From 9c9964a6e9d2c5fee7c54de453c4b251934adf8f Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Wed, 1 Jan 2025 20:20:41 -0800 Subject: [PATCH 154/162] Consistent uses of semicolons in verbose logs. --- src/strategies/iterative.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index abb4ad8..b67595b 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -683,7 +683,7 @@ where let end = Instant::now(); let interval = end - interval_start; eprintln!( - "Iterative aspiration depth{:>2} took{:>5}ms; bounds{:>5} bestmove={}", + "Iterative aspiration depth{:>2} took{:>5}ms; bounds{:>5}; bestmove={}", depth, interval.as_millis(), entry.bounds(), @@ -706,7 +706,7 @@ where if self.opts.verbose { let interval = Instant::now() - interval_start; eprintln!( - "Iterative fullsearch depth{:>2} took{:>5}ms; value{:>6} bestmove={}", + "Iterative fullsearch depth{:>2} took{:>5}ms; value{:>6}; bestmove={}", depth, interval.as_millis(), entry.value_string(), From 3846d5b26788de402dba74f3d0626d76bebd0e3e Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Thu, 2 Jan 2025 09:15:26 -0800 Subject: [PATCH 155/162] Back out "Removed Copy trait bound on Game::M" No followup interest to bring this to any of the strategies, and I still think games should be heavily encouraged to find ways to make moves super small and cheap. This backs out commit 70dbd40b81ec5e2166eaa0bbbc3a874fac318a1b. --- examples/chess/src/main.rs | 6 +++--- examples/connect4.rs | 4 ++-- examples/mancala.rs | 6 +++--- examples/ttt.rs | 6 +++--- src/interface.rs | 12 ++++++------ src/lib.rs | 2 +- src/strategies/iterative.rs | 8 ++++---- src/strategies/mcts.rs | 8 ++++---- src/strategies/negamax.rs | 4 ++-- src/strategies/table.rs | 2 +- src/strategies/util.rs | 2 +- src/strategies/ybw.rs | 12 ++++++------ src/util.rs | 24 ++++++++++++------------ tests/strategies.rs | 20 ++++++++++---------- 14 files changed, 58 insertions(+), 58 deletions(-) diff --git a/examples/chess/src/main.rs b/examples/chess/src/main.rs index 84c8959..09c3018 100644 --- a/examples/chess/src/main.rs +++ b/examples/chess/src/main.rs @@ -24,8 +24,8 @@ impl minimax::Game for Chess { } } - fn apply(b: &mut Board, m: &ChessMove) -> Option { - Some(b.make_move_new(*m)) + fn apply(b: &mut Board, m: ChessMove) -> Option { + Some(b.make_move_new(m)) } fn zobrist_hash(b: &Board) -> u64 { @@ -74,7 +74,7 @@ fn main() { while Chess::get_winner(&b).is_none() { println!("{}", b); match strategy.choose_move(&b) { - Some(m) => b = Chess::apply(&mut b, &m).unwrap(), + Some(m) => b = Chess::apply(&mut b, m).unwrap(), None => break, } } diff --git a/examples/connect4.rs b/examples/connect4.rs index ca43849..2d81670 100644 --- a/examples/connect4.rs +++ b/examples/connect4.rs @@ -128,7 +128,7 @@ impl minimax::Game for Game { } } - fn apply(b: &mut Board, place: &Place) -> Option { + fn apply(b: &mut Board, place: Place) -> Option { let mut b = b.clone(); let col = (b.all_pieces >> place.col_shift()) & COL_MASK; let new_piece = (col + 1) << place.col_shift(); @@ -278,7 +278,7 @@ fn main() { Some(m) => { let color = if b.reds_move() { "Red" } else { "Yellow" }; println!("{} piece in column {}", color, m.col + 1); - b = self::Game::apply(&mut b, &m).unwrap(); + b = self::Game::apply(&mut b, m).unwrap(); } None => break, } diff --git a/examples/mancala.rs b/examples/mancala.rs index f266c38..5407810 100644 --- a/examples/mancala.rs +++ b/examples/mancala.rs @@ -43,7 +43,7 @@ impl minimax::Game for Mancala { } } - fn apply(board: &mut Board, m: &Move) -> Option { + fn apply(board: &mut Board, m: Move) -> Option { let mut board = board.clone(); if board.skipped { board.skipped = false; @@ -53,7 +53,7 @@ impl minimax::Game for Mancala { // Grab the stones. let mut player = board.to_move as usize; - let mut i = *m as usize; + let mut i = m as usize; let mut stones = board.pits[player][i]; board.pits[player][i] = 0; // At the beginning of each iteration, it points at the previous pit. @@ -161,7 +161,7 @@ fn main() { while Mancala::get_winner(&board).is_none() { println!("{}", board); match strategy.choose_move(&board) { - Some(m) => board = Mancala::apply(&mut board, &m).unwrap(), + Some(m) => board = Mancala::apply(&mut board, m).unwrap(), None => break, } } diff --git a/examples/ttt.rs b/examples/ttt.rs index 6b7cc67..eaf7e3e 100644 --- a/examples/ttt.rs +++ b/examples/ttt.rs @@ -152,12 +152,12 @@ impl minimax::Game for Game { } } - fn apply(b: &mut Board, m: &Place) -> Option { + fn apply(b: &mut Board, m: Place) -> Option { b.squares[m.i as usize] = b.to_move; b.to_move = b.to_move.invert(); None } - fn undo(b: &mut Board, m: &Place) { + fn undo(b: &mut Board, m: Place) { b.squares[m.i as usize] = Square::Empty; b.to_move = b.to_move.invert(); } @@ -247,7 +247,7 @@ fn main() { println!("{}", b); let ref mut strategy = strategies[s]; match strategy.choose_move(&mut b) { - Some(m) => self::Game::apply(&mut b, &m), + Some(m) => self::Game::apply(&mut b, m), None => break, }; s = 1 - s; diff --git a/src/interface.rs b/src/interface.rs index 4c0f365..73b9894 100644 --- a/src/interface.rs +++ b/src/interface.rs @@ -81,7 +81,7 @@ pub trait Game: Sized { /// The type of the game state. type S; /// The type of game moves. - type M; + type M: Copy; /// Generate moves at the given state. fn generate_moves(state: &Self::S, moves: &mut Vec); @@ -96,11 +96,11 @@ pub trait Game: Sized { /// ``` /// struct BigBoard([u8; 4096]); /// struct BigMove(u16); - /// fn apply(state: &mut BigBoard, m: &BigMove) -> Option { + /// fn apply(state: &mut BigBoard, m: BigMove) -> Option { /// state.0[m.0 as usize] += 1; /// None /// } - /// fn undo(state: &mut BigBoard, m: &BigMove) { + /// fn undo(state: &mut BigBoard, m: BigMove) { /// state.0[m.0 as usize] -= 1; /// } /// ``` @@ -109,14 +109,14 @@ pub trait Game: Sized { /// ``` /// struct SmallBoard(u64); /// struct SmallMove(u8); - /// fn apply(state: &mut SmallBoard, m: &SmallMove) -> Option { + /// fn apply(state: &mut SmallBoard, m: SmallMove) -> Option { /// Some(SmallBoard(state.0 | (1< Option; + fn apply(state: &mut Self::S, m: Self::M) -> Option; /// Undo mutation done in apply, if any. - fn undo(_state: &mut Self::S, _m: &Self::M) {} + fn undo(_state: &mut Self::S, _m: Self::M) {} /// Returns `Some(PlayerJustMoved)` or `Some(PlayerToMove)` if there's a winner, /// `Some(Draw)` if the state is terminal without a winner, and `None` if diff --git a/src/lib.rs b/src/lib.rs index 6e71f63..bc83adf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -42,7 +42,7 @@ //! } //! } //! -//! fn apply(state: &mut War, tug: &Tug) -> Option { +//! fn apply(state: &mut War, tug: Tug) -> Option { //! Some(War(state.0 + tug.0)) //! } //! } diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index b67595b..7f72796 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -375,7 +375,7 @@ where self.eval.evaluate(s) >= beta { // If we just pass and let the opponent play this position (at reduced depth), - let mut nulled = AppliedMove::::new(s, &null_move); + let mut nulled = AppliedMove::::new(s, null_move); let value = -self.negamax(&mut nulled, None, depth - depth_reduction, -beta, -beta + 1)?; // is the result still so good that we shouldn't bother with a full search? @@ -411,7 +411,7 @@ where let mut best = WORST_EVAL; for m in moves.iter() { - let mut new = AppliedMove::::new(s, m); + let mut new = AppliedMove::::new(s, *m); let value = -self.noisy_negamax(&mut new, depth - 1, -beta, -alpha)?; best = max(best, value); alpha = max(alpha, value); @@ -480,7 +480,7 @@ where let mut best_move = moves[0]; let mut null_window = false; for &m in moves.iter() { - let mut new = AppliedMove::::new(s, &m); + let mut new = AppliedMove::::new(s, m); let value = if null_window { let probe = -self.negamax(&mut new, Some(m), depth - 1, -alpha - 1, -alpha)?; if probe > alpha && probe < beta { @@ -534,7 +534,7 @@ where let mut alpha = WORST_EVAL; let beta = BEST_EVAL; for value_move in moves.iter_mut() { - let mut new = AppliedMove::::new(s, &value_move.m); + let mut new = AppliedMove::::new(s, value_move.m); let value = -self.negamax(&mut new, Some(value_move.m), depth - 1, -beta, -alpha)?; alpha = max(alpha, value); diff --git a/src/strategies/mcts.rs b/src/strategies/mcts.rs index 851ccc3..80670b4 100644 --- a/src/strategies/mcts.rs +++ b/src/strategies/mcts.rs @@ -4,7 +4,7 @@ use super::sync_util::*; use super::util::{move_id, pv_string, random_best}; use rand::rngs::ThreadRng; -use rand::Rng; +use rand::seq::SliceRandom; use std::marker::PhantomData; use std::sync::atomic::Ordering::{Relaxed, SeqCst}; use std::sync::atomic::{AtomicBool, AtomicI32, AtomicU32}; @@ -211,7 +211,7 @@ pub trait RolloutPolicy { moves.clear(); let m = self.random_move(&mut state, &mut moves, &mut rng); - if let Some(new_state) = Self::G::apply(&mut state, &m) { + if let Some(new_state) = Self::G::apply(&mut state, m) { state = new_state; } sign = -sign; @@ -231,7 +231,7 @@ impl RolloutPolicy for DumbRolloutPolicy { rng: &mut ThreadRng, ) -> ::M { G::generate_moves(state, moves); - moves.swap_remove(rng.gen_range(0..moves.len())) + *moves.choose(rng).unwrap() } } @@ -341,7 +341,7 @@ impl MonteCarloTreeSearch { None => return Some(0), }; let m = next.m.as_ref().unwrap(); - let mut new = AppliedMove::::new(state, m); + let mut new = AppliedMove::::new(state, *m); let child_result = self.simulate(next, &mut new, force_rollout)?; // Propagate up forced wins and losses. diff --git a/src/strategies/negamax.rs b/src/strategies/negamax.rs index e98197d..f030122 100644 --- a/src/strategies/negamax.rs +++ b/src/strategies/negamax.rs @@ -49,7 +49,7 @@ impl Negamax { E::G::generate_moves(s, &mut moves); let mut best = WORST_EVAL; for m in moves.iter() { - let mut new = AppliedMove::::new(s, m); + let mut new = AppliedMove::::new(s, *m); let value = -self.negamax(&mut new, depth - 1, -beta, -alpha); best = max(best, value); alpha = max(alpha, value); @@ -85,7 +85,7 @@ where let mut s_clone = s.clone(); for &m in moves.iter() { // determine value for this move - let mut new = AppliedMove::::new(&mut s_clone, &m); + let mut new = AppliedMove::::new(&mut s_clone, m); let value = -self.negamax(&mut new, self.max_depth - 1, WORST_EVAL, -best); // Strictly better than any move found so far. if value > best { diff --git a/src/strategies/table.rs b/src/strategies/table.rs index b8b0181..abc6765 100644 --- a/src/strategies/table.rs +++ b/src/strategies/table.rs @@ -126,7 +126,7 @@ pub(super) trait Table { // equivalent upper and lower bounds. let m = entry.best_move.unwrap(); pv.push(m); - if let Some(new_state) = G::apply(&mut state, &m) { + if let Some(new_state) = G::apply(&mut state, m) { state = new_state; } hash = G::zobrist_hash(&state); diff --git a/src/strategies/util.rs b/src/strategies/util.rs index 52953d3..7fd842f 100644 --- a/src/strategies/util.rs +++ b/src/strategies/util.rs @@ -47,7 +47,7 @@ where out.push_str("; "); } out.push_str(move_id::(&state, Some(m)).as_str()); - if let Some(new_state) = G::apply(&mut state, &m) { + if let Some(new_state) = G::apply(&mut state, m) { state = new_state; } } diff --git a/src/strategies/ybw.rs b/src/strategies/ybw.rs index 1712e3a..31ea981 100644 --- a/src/strategies/ybw.rs +++ b/src/strategies/ybw.rs @@ -119,7 +119,7 @@ where self.eval.evaluate(s) >= beta { // If we just pass and let the opponent play this position (at reduced depth), - let mut nulled = AppliedMove::::new(s, &null_move); + let mut nulled = AppliedMove::::new(s, null_move); let value = -self.negamax(&mut nulled, None, depth - depth_reduction, -beta, -beta + 1)?; // is the result still so good that we shouldn't bother with a full search? @@ -156,7 +156,7 @@ where let mut best = WORST_EVAL; for &m in moves.iter() { - let mut new = AppliedMove::::new(s, &m); + let mut new = AppliedMove::::new(s, m); let value = -self.noisy_negamax(&mut new, depth - 1, -beta, -alpha)?; best = max(best, value); alpha = max(alpha, value); @@ -226,7 +226,7 @@ where // Evaluate first move serially. let initial_value = { - let mut new = AppliedMove::::new(s, &first_move); + let mut new = AppliedMove::::new(s, first_move); -self.negamax(&mut new, Some(first_move), depth - 1, -beta, -alpha)? }; alpha = max(alpha, initial_value); @@ -239,7 +239,7 @@ where let mut best_move = first_move; let mut null_window = false; for &m in moves[1..].iter() { - let mut new = AppliedMove::::new(s, &m); + let mut new = AppliedMove::::new(s, m); let value = if null_window { let probe = -self.negamax(&mut new, Some(m), depth - 1, -alpha - 1, -alpha)?; if probe > alpha && probe < beta { @@ -279,7 +279,7 @@ where } let mut state = s.clone(); - let mut new = AppliedMove::::new(&mut state, &m); + let mut new = AppliedMove::::new(&mut state, m); let value = if self.opts.null_window_search && initial_alpha > alpha_orig { // TODO: send reference to alpha as neg_beta to children. let probe = -self.negamax( @@ -492,7 +492,7 @@ where &self.thread_pool, ); let mut state = s.clone(); - if let Some(new_state) = E::G::apply(&mut state, &best_move) { + if let Some(new_state) = E::G::apply(&mut state, best_move) { state = new_state; } // Launch in threadpool asynchronously. diff --git a/src/util.rs b/src/util.rs index d4badb7..3bacc4b 100644 --- a/src/util.rs +++ b/src/util.rs @@ -11,35 +11,35 @@ use rayon::prelude::*; use std::default::Default; use std::time::Instant; -pub(crate) struct AppliedMove<'a, 'b, G: Game> { +pub(crate) struct AppliedMove<'a, G: Game> { old: &'a mut ::S, new: Option<::S>, - m: &'b ::M, + m: ::M, } -impl<'a, 'b, G: Game> std::ops::Deref for AppliedMove<'a, 'b, G> { +impl<'a, G: Game> std::ops::Deref for AppliedMove<'a, G> { type Target = ::S; fn deref(&self) -> &::S { self.new.as_ref().unwrap_or(self.old) } } -impl<'a, 'b, G: Game> std::ops::DerefMut for AppliedMove<'a, 'b, G> { +impl<'a, G: Game> std::ops::DerefMut for AppliedMove<'a, G> { fn deref_mut(&mut self) -> &mut ::S { self.new.as_mut().unwrap_or(self.old) } } -impl<'a, 'b, G: Game> Drop for AppliedMove<'a, 'b, G> { +impl<'a, G: Game> Drop for AppliedMove<'a, G> { fn drop(&mut self) { - ::undo(self.old, &self.m) + ::undo(self.old, self.m) } } -impl<'a, 'b, G: Game> AppliedMove<'a, 'b, G> { - pub(crate) fn new(old: &'a mut ::S, m: &'b ::M) -> Self { +impl<'a, G: Game> AppliedMove<'a, G> { + pub(crate) fn new(old: &'a mut ::S, m: ::M) -> Self { let new = G::apply(old, m); - AppliedMove { old, new, m: m } + AppliedMove { old, new, m } } } @@ -68,7 +68,7 @@ where let strategy = &mut strategies[s]; match strategy.choose_move(&state) { Some(m) => { - if let Some(new_state) = G::apply(&mut state, &m) { + if let Some(new_state) = G::apply(&mut state, m) { state = new_state; } } @@ -122,7 +122,7 @@ where // Single-thread recurse. let mut count = 0; for &m in moves.iter() { - let mut new = AppliedMove::::new(state, &m); + let mut new = AppliedMove::::new(state, m); count += perft_recurse::(pool, &mut new, depth - 1, single_thread_cutoff); } count @@ -134,7 +134,7 @@ where .map(|m| { let mut state = state.clone(); let mut pool2 = MovePool::::default(); - if let Some(new_state) = G::apply(&mut state, m) { + if let Some(new_state) = G::apply(&mut state, *m) { state = new_state; } perft_recurse::(&mut pool2, &mut state, depth - 1, single_thread_cutoff) diff --git a/tests/strategies.rs b/tests/strategies.rs index 54850e4..bc290df 100644 --- a/tests/strategies.rs +++ b/tests/strategies.rs @@ -42,7 +42,7 @@ impl PlainNegamax { E::G::generate_moves(s, &mut moves); let mut best = WORST_EVAL; for &m in moves.iter() { - let mut new = E::G::apply(s, &m).unwrap(); + let mut new = E::G::apply(s, m).unwrap(); let value = -self.negamax(&mut new, depth - 1); best = max(best, value); } @@ -63,7 +63,7 @@ where let mut best_value = WORST_EVAL; let mut s = s.clone(); for &m in moves.iter() { - let mut new = E::G::apply(&mut s, &m).unwrap(); + let mut new = E::G::apply(&mut s, m).unwrap(); let value = -self.negamax(&mut new, self.depth - 1); if value == best_value { self.best_moves.push(m); @@ -107,7 +107,7 @@ fn generate_random_state(depth: u8) -> connect4::Board { let mut moves = Vec::new(); connect4::Game::generate_moves(&b, &mut moves); let m = moves.choose(&mut rng).unwrap(); - let next = connect4::Game::apply(&mut b, m).unwrap(); + let next = connect4::Game::apply(&mut b, *m).unwrap(); if connect4::Game::get_winner(&next).is_some() { // Oops, undo and try again on the next iter. } else { @@ -120,13 +120,13 @@ fn generate_random_state(depth: u8) -> connect4::Board { #[test] fn test_winning_position() { let mut b = connect4::Board::default(); - b = connect4::Game::apply(&mut b, &connect4::Place { col: 2 }).unwrap(); - b = connect4::Game::apply(&mut b, &connect4::Place { col: 3 }).unwrap(); - b = connect4::Game::apply(&mut b, &connect4::Place { col: 2 }).unwrap(); - b = connect4::Game::apply(&mut b, &connect4::Place { col: 3 }).unwrap(); - b = connect4::Game::apply(&mut b, &connect4::Place { col: 2 }).unwrap(); - b = connect4::Game::apply(&mut b, &connect4::Place { col: 3 }).unwrap(); - b = connect4::Game::apply(&mut b, &connect4::Place { col: 2 }).unwrap(); + b = connect4::Game::apply(&mut b, connect4::Place { col: 2 }).unwrap(); + b = connect4::Game::apply(&mut b, connect4::Place { col: 3 }).unwrap(); + b = connect4::Game::apply(&mut b, connect4::Place { col: 2 }).unwrap(); + b = connect4::Game::apply(&mut b, connect4::Place { col: 3 }).unwrap(); + b = connect4::Game::apply(&mut b, connect4::Place { col: 2 }).unwrap(); + b = connect4::Game::apply(&mut b, connect4::Place { col: 3 }).unwrap(); + b = connect4::Game::apply(&mut b, connect4::Place { col: 2 }).unwrap(); assert_eq!(Some(Winner::PlayerJustMoved), connect4::Game::get_winner(&b)); // Make sure none of the strategies die when given a winning position. From 4b89a9c2c8203f62be5d5912eb04e6b9e1e2fd33 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Thu, 2 Jan 2025 11:26:38 -0800 Subject: [PATCH 156/162] Fix new clippy lints. --- src/util.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/util.rs b/src/util.rs index 3bacc4b..11f19cd 100644 --- a/src/util.rs +++ b/src/util.rs @@ -17,20 +17,20 @@ pub(crate) struct AppliedMove<'a, G: Game> { m: ::M, } -impl<'a, G: Game> std::ops::Deref for AppliedMove<'a, G> { +impl std::ops::Deref for AppliedMove<'_, G> { type Target = ::S; fn deref(&self) -> &::S { self.new.as_ref().unwrap_or(self.old) } } -impl<'a, G: Game> std::ops::DerefMut for AppliedMove<'a, G> { +impl std::ops::DerefMut for AppliedMove<'_, G> { fn deref_mut(&mut self) -> &mut ::S { self.new.as_mut().unwrap_or(self.old) } } -impl<'a, G: Game> Drop for AppliedMove<'a, G> { +impl Drop for AppliedMove<'_, G> { fn drop(&mut self) { ::undo(self.old, self.m) } From 92031c2ecb20cc4f564e0dc4a283120ddf1c0b47 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Thu, 2 Jan 2025 11:22:30 -0800 Subject: [PATCH 157/162] Respect the move ordering in parallel search. Countermoves et al will all be checked first across all threads. --- src/strategies/sync_util.rs | 14 +++++++++++++- src/strategies/ybw.rs | 4 ++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/src/strategies/sync_util.rs b/src/strategies/sync_util.rs index 2dcae59..e7522c3 100644 --- a/src/strategies/sync_util.rs +++ b/src/strategies/sync_util.rs @@ -1,9 +1,11 @@ use std::ops::{Deref, DerefMut}; -use std::sync::atomic::{AtomicBool, AtomicPtr, Ordering}; +use std::sync::atomic::{AtomicBool, AtomicPtr, AtomicUsize, Ordering}; use std::sync::Arc; use std::thread::{sleep, spawn}; use std::time::Duration; +use rayon::prelude::*; + pub(super) fn timeout_signal(dur: Duration) -> Arc { // Theoretically we could include an async runtime to do this and use // fewer threads, but the stdlib implementation is only a few lines... @@ -147,3 +149,13 @@ impl DerefMut for CachePadded { &mut self.value } } + +/// Iterate through the given array in parallel, but whenever any +/// thread starts a new item it gets the next one in line. +pub(super) fn par_iter_in_order(array: &[T]) -> impl ParallelIterator { + let index = AtomicUsize::new(0); + (0..array.len()) + .into_par_iter() + .with_max_len(1) + .map(move |_| &array[index.fetch_add(1, Ordering::SeqCst)]) +} diff --git a/src/strategies/ybw.rs b/src/strategies/ybw.rs index 31ea981..83a7265 100644 --- a/src/strategies/ybw.rs +++ b/src/strategies/ybw.rs @@ -10,7 +10,7 @@ extern crate rayon; use super::super::interface::*; use super::super::util::*; use super::iterative::{IterativeOptions, Stats}; -use super::sync_util::{timeout_signal, CachePadded, ThreadLocal}; +use super::sync_util::{par_iter_in_order, timeout_signal, CachePadded, ThreadLocal}; use super::table::*; use super::util::*; @@ -271,7 +271,7 @@ where let alpha = AtomicI16::new(alpha); let best_move = Mutex::new(ValueMove::new(initial_value, first_move)); // Parallel search - let result = moves[1..].par_iter().with_max_len(1).try_for_each(|&m| -> Option<()> { + let result = par_iter_in_order(&moves[1..]).try_for_each(|&m| -> Option<()> { // Check to see if we're cancelled by another branch. let initial_alpha = alpha.load(Ordering::SeqCst); if initial_alpha >= beta { From ff79d162f5b5e72f5ed298dbbfa682f89e3f0395 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Thu, 2 Jan 2025 12:04:10 -0800 Subject: [PATCH 158/162] Rename util.rs in common.rs in src/strategies/. This prevents me being confused when having two util.rs files opened in my editor. --- src/strategies/{util.rs => common.rs} | 0 src/strategies/iterative.rs | 2 +- src/strategies/mcts.rs | 2 +- src/strategies/mod.rs | 2 +- src/strategies/negamax.rs | 2 +- src/strategies/table.rs | 2 +- src/strategies/ybw.rs | 2 +- 7 files changed, 6 insertions(+), 6 deletions(-) rename src/strategies/{util.rs => common.rs} (100%) diff --git a/src/strategies/util.rs b/src/strategies/common.rs similarity index 100% rename from src/strategies/util.rs rename to src/strategies/common.rs diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index 7f72796..ecc0903 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -6,10 +6,10 @@ use super::super::interface::*; use super::super::util::*; +use super::common::*; #[cfg(not(target_arch = "wasm32"))] use super::sync_util::timeout_signal; use super::table::*; -use super::util::*; use instant::Instant; use rand::prelude::SliceRandom; diff --git a/src/strategies/mcts.rs b/src/strategies/mcts.rs index 80670b4..584c549 100644 --- a/src/strategies/mcts.rs +++ b/src/strategies/mcts.rs @@ -1,7 +1,7 @@ use super::super::interface::*; use super::super::util::AppliedMove; +use super::common::{move_id, pv_string, random_best}; use super::sync_util::*; -use super::util::{move_id, pv_string, random_best}; use rand::rngs::ThreadRng; use rand::seq::SliceRandom; diff --git a/src/strategies/mod.rs b/src/strategies/mod.rs index 6021134..c7a292d 100644 --- a/src/strategies/mod.rs +++ b/src/strategies/mod.rs @@ -8,7 +8,7 @@ pub mod random; #[cfg(not(target_arch = "wasm32"))] pub mod ybw; +mod common; #[cfg(not(target_arch = "wasm32"))] mod sync_util; mod table; -mod util; diff --git a/src/strategies/negamax.rs b/src/strategies/negamax.rs index f030122..99a7335 100644 --- a/src/strategies/negamax.rs +++ b/src/strategies/negamax.rs @@ -5,7 +5,7 @@ use super::super::interface::*; use super::super::util::*; -use super::util::*; +use super::common::*; use rand::seq::SliceRandom; use std::cmp::max; diff --git a/src/strategies/table.rs b/src/strategies/table.rs index abc6765..3ff9f1f 100644 --- a/src/strategies/table.rs +++ b/src/strategies/table.rs @@ -1,4 +1,4 @@ -use super::util::{move_to_front, unclamp_value}; +use super::common::{move_to_front, unclamp_value}; use crate::interface::*; use std::cmp::{max, min}; use std::sync::atomic::{AtomicU32, AtomicU8, Ordering}; diff --git a/src/strategies/ybw.rs b/src/strategies/ybw.rs index 83a7265..126d678 100644 --- a/src/strategies/ybw.rs +++ b/src/strategies/ybw.rs @@ -9,10 +9,10 @@ extern crate rayon; use super::super::interface::*; use super::super::util::*; +use super::common::*; use super::iterative::{IterativeOptions, Stats}; use super::sync_util::{par_iter_in_order, timeout_signal, CachePadded, ThreadLocal}; use super::table::*; -use super::util::*; use rayon::prelude::*; use std::cmp::max; From 5d030a081819801e3a74a5ad5d7fbace8c667e36 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Thu, 2 Jan 2025 18:49:39 -0800 Subject: [PATCH 159/162] Decay old countermoves values faster. Otherwise high values can stick around several moves after they were relevant. --- src/strategies/table.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/strategies/table.rs b/src/strategies/table.rs index 3ff9f1f..76d8363 100644 --- a/src/strategies/table.rs +++ b/src/strategies/table.rs @@ -436,6 +436,6 @@ where } // Partially degrade old values, to bias towards new data. - self.history_table.iter_mut().for_each(|n| *n >>= 1); + self.history_table.iter_mut().for_each(|n| *n >>= 3); } } From 1c094fdae8123408fcd8f6f726ef5d419245df50 Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Wed, 19 Feb 2025 11:16:13 -0800 Subject: [PATCH 160/162] Release 0.5.4 * Updates to countermoves reordering. * Upgrade rand dependency. --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 665739a..82a44c8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "minimax" -version = "0.5.3" +version = "0.5.4" authors = [ "Eric Roshan-Eisner ", "Samuel Fredrickson " ] description = "Generic implementations of Minimax." documentation = "https://docs.rs/minimax" From bb04bc3ca4623545fbe3c31603335b9138ba50ca Mon Sep 17 00:00:00 2001 From: Eric Roshan-Eisner Date: Tue, 18 Feb 2025 22:39:12 -0800 Subject: [PATCH 161/162] Update rand to 0.9 Alternatives considered: I also looked into a few different tiny crates that just do basic RNG with less overhead and cruft. Unfortunately most of them default to a const seed in wasm32-unknown-unknown, so I'd need to do my own getrandom all over the place. Also whatever crate I use will be in the public API of MCTS rollouts, and fancy MCTS rollouts may actually want the fancy distributions that rand provides. While here I sprinkled Send in some places and removed holding on to any ThreadRngs so that all Strategies can be Send types. Switching the rollouts API to SmallRng is technically an API change but any actual usage (do I have users?) will very likely be textually compatible. --- Cargo.toml | 4 ++-- src/strategies/common.rs | 2 +- src/strategies/iterative.rs | 2 +- src/strategies/mcts.rs | 15 ++++++++------- src/strategies/negamax.rs | 2 +- src/strategies/random.rs | 7 +++---- tests/strategies.rs | 4 ++-- 7 files changed, 18 insertions(+), 18 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 82a44c8..8df0610 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,14 +12,14 @@ license = "MIT" [dependencies] instant = { version = "0.1", features = ["wasm-bindgen"] } -rand = "0.8" +rand = "0.9" [target.'cfg(not(target_arch="wasm32"))'.dependencies] num_cpus = "1.0" rayon = "^1.5" [target.'cfg(target_arch="wasm32")'.dependencies] -getrandom = { version = "0.2", features = ["js"]} +getrandom = { version = "0.3", features = ["wasm_js"]} [dev-dependencies] bencher = "0.1.5" diff --git a/src/strategies/common.rs b/src/strategies/common.rs index 7fd842f..f5c1499 100644 --- a/src/strategies/common.rs +++ b/src/strategies/common.rs @@ -108,7 +108,7 @@ pub(super) fn random_best f32>(set: &[T], score_fn: F) -> Option let n = set.len(); // Combine both random numbers into a single rng call. - let r = rand::thread_rng().gen_range(0..n * PRIMES.len()); + let r = rand::rng().random_range(0..n * PRIMES.len()); let mut i = r / PRIMES.len(); let stride = PRIMES[r % PRIMES.len()]; diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs index ecc0903..86189ca 100644 --- a/src/strategies/iterative.rs +++ b/src/strategies/iterative.rs @@ -655,7 +655,7 @@ where let mut moves = Vec::new(); E::G::generate_moves(&s_clone, &mut moves); // Start in a random order. - moves.shuffle(&mut rand::thread_rng()); + moves.shuffle(&mut rand::rng()); let mut moves = moves.into_iter().map(|m| ValueMove::new(0, m)).collect::>(); // Start at 1 or 2 to hit the max depth. diff --git a/src/strategies/mcts.rs b/src/strategies/mcts.rs index 584c549..a8a5161 100644 --- a/src/strategies/mcts.rs +++ b/src/strategies/mcts.rs @@ -3,8 +3,9 @@ use super::super::util::AppliedMove; use super::common::{move_id, pv_string, random_best}; use super::sync_util::*; -use rand::rngs::ThreadRng; -use rand::seq::SliceRandom; +use rand::prelude::IndexedRandom; +use rand::rngs::SmallRng; +use rand::SeedableRng; use std::marker::PhantomData; use std::sync::atomic::Ordering::{Relaxed, SeqCst}; use std::sync::atomic::{AtomicBool, AtomicI32, AtomicU32}; @@ -170,7 +171,7 @@ pub trait RolloutPolicy { /// The provided move vec is for scratch space. fn random_move( &self, state: &mut ::S, move_scratch: &mut Vec<::M>, - rng: &mut ThreadRng, + rng: &mut SmallRng, ) -> ::M; /// Implementation of a rollout over many random moves. Not needed to be overridden. @@ -178,7 +179,7 @@ pub trait RolloutPolicy { where ::S: Clone, { - let mut rng = rand::thread_rng(); + let mut rng = SmallRng::from_rng(&mut rand::rng()); let mut depth = options.max_rollout_depth; let mut state = state.clone(); let mut moves = Vec::new(); @@ -228,7 +229,7 @@ impl RolloutPolicy for DumbRolloutPolicy { type G = G; fn random_move( &self, state: &mut ::S, moves: &mut Vec<::M>, - rng: &mut ThreadRng, + rng: &mut SmallRng, ) -> ::M { G::generate_moves(state, moves); *moves.choose(rng).unwrap() @@ -242,7 +243,7 @@ pub struct MonteCarloTreeSearch { max_rollouts: u32, max_time: Duration, timeout: Arc, - rollout_policy: Option + Sync>>, + rollout_policy: Option + Send + Sync>>, pv: Vec, game_type: PhantomData, } @@ -264,7 +265,7 @@ impl MonteCarloTreeSearch { /// random move generation to prefer certain kinds of moves, always choose /// winning moves, etc. pub fn new_with_policy( - options: MCTSOptions, policy: Box + Sync>, + options: MCTSOptions, policy: Box + Send + Sync>, ) -> Self { Self { options, diff --git a/src/strategies/negamax.rs b/src/strategies/negamax.rs index 99a7335..878eb0b 100644 --- a/src/strategies/negamax.rs +++ b/src/strategies/negamax.rs @@ -22,7 +22,7 @@ impl Negamax { Negamax { max_depth: depth, move_pool: MovePool::<_>::default(), - rng: rand::thread_rng(), + rng: rand::rng(), prev_value: 0, eval, } diff --git a/src/strategies/random.rs b/src/strategies/random.rs index b657303..974d614 100644 --- a/src/strategies/random.rs +++ b/src/strategies/random.rs @@ -1,17 +1,16 @@ //! A strategy that randomly chooses a move, for use in tests. use super::super::interface::*; -use rand::seq::SliceRandom; +use rand::prelude::IndexedRandom; use std::marker::PhantomData; pub struct Random { - rng: rand::rngs::ThreadRng, game_type: PhantomData, } impl Random { pub fn new() -> Self { - Self { rng: rand::thread_rng(), game_type: PhantomData } + Self { game_type: PhantomData } } } @@ -28,6 +27,6 @@ where fn choose_move(&mut self, s: &G::S) -> Option { let mut moves = Vec::new(); G::generate_moves(s, &mut moves); - moves.choose(&mut self.rng).copied() + moves.choose(&mut rand::rng()).copied() } } diff --git a/tests/strategies.rs b/tests/strategies.rs index bc290df..e64de1b 100644 --- a/tests/strategies.rs +++ b/tests/strategies.rs @@ -10,7 +10,7 @@ extern crate rand; mod connect4; use minimax::*; -use rand::seq::SliceRandom; +use rand::prelude::IndexedRandom; use std::cmp::max; use std::collections::hash_map::DefaultHasher; use std::hash::Hasher; @@ -101,7 +101,7 @@ impl minimax::Evaluator for RandomEvaluator { } fn generate_random_state(depth: u8) -> connect4::Board { - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); let mut b = connect4::Board::default(); for _ in 0..depth { let mut moves = Vec::new(); From 00320655cf63dc75aee4d2bbfdc75b663e8a9b91 Mon Sep 17 00:00:00 2001 From: Rohan Sarvaria Date: Thu, 9 Oct 2025 21:16:59 -0400 Subject: [PATCH 162/162] feat(stats): iterative deepening statistics table via parallel reads --- src/strategies/sync_util.rs | 10 +++++-- src/strategies/ybw.rs | 52 ++++++++++++++++++++++--------------- 2 files changed, 39 insertions(+), 23 deletions(-) diff --git a/src/strategies/sync_util.rs b/src/strategies/sync_util.rs index e7522c3..cc0b3f2 100644 --- a/src/strategies/sync_util.rs +++ b/src/strategies/sync_util.rs @@ -101,9 +101,15 @@ impl ThreadLocal { } // With a &mut self, no other threads can be using it. - pub(super) fn do_all(&mut self, f: F) { + pub(super) fn do_all_mut(&mut self, f: F) { self.locals.iter_mut().for_each(f); } + + // This is extremely suspicious, but parallel-reads mapping each threadlocal + // into an output element. Good for interior mutability tasks. + pub(super) fn do_all(&self, f: F) { + self.locals.iter().for_each(f); + } } #[test] @@ -114,7 +120,7 @@ fn test_threadlocal() { let count = 100000; (0..count).into_par_iter().for_each(|_| tls.local_do(|x| *x += 1)); let mut sum = 0; - tls.do_all(|x| sum += *x); + tls.do_all_mut(|x| sum += *x); assert_eq!(sum, count); let result = std::panic::catch_unwind(|| { diff --git a/src/strategies/ybw.rs b/src/strategies/ybw.rs index 126d678..e29be18 100644 --- a/src/strategies/ybw.rs +++ b/src/strategies/ybw.rs @@ -324,13 +324,12 @@ where fn iterative_search( &self, mut state: ::S, max_depth: u8, background: bool, - ) -> Option<(::M, Evaluation)> { + ) -> Option<(::M, Evaluation, u8)> { self.table.concurrent_advance_generation(); let root_hash = E::G::zobrist_hash(&state); let mut best_move = None; let mut best_value = 0; let mut interval_start; - let mut pv = String::new(); let mut depth = max_depth % self.opts.step_increment; if depth == 0 { @@ -359,8 +358,14 @@ where if self.opts.verbose && !background { let interval = Instant::now() - interval_start; + let mut stats = Stats::default(); + self.stats.do_all(|s| stats.add(s)); + let mbf = stats.total_generated_moves as f64 / stats.total_generate_move_calls as f64; + let ebf = (stats.nodes_explored as f64).powf(((depth as f64) + 1.0).recip()); + let nps = stats.nodes_explored as f64 / interval.as_secs_f64(); + let count = stats.nodes_explored; eprintln!( - "Parallel (threads={}) depth{:>2} took{:>5}ms; returned{:>5}; bestmove {}", + "Parallel (threads={}) depth={:>2}, took={:>6}ms; returned{:>5}; bestmove {}; MBF={mbf:>6.1} EBF={ebf:>6.1}; NPS={nps:>9.0}; total={count:>11}", self.par_opts.num_threads(), depth, interval.as_millis(), @@ -373,28 +378,14 @@ where let mut pv_moves = Vec::new(); self.table.populate_pv::(&mut pv_moves, &state); self.pv.lock().unwrap().clone_from(&pv_moves); - pv = pv_string::(&pv_moves[..], &state); if unclamp_value(entry.value).abs() == BEST_EVAL { break; } } - if self.opts.verbose && !background { - eprintln!("Principal variation: {}", pv); - } - best_move.map(|m| (m, best_value)) + best_move.map(|m| (m, best_value, depth)) } } -fn pretty_stats(stats: &Stats, start: Instant) -> String { - let mean_branching_factor = - stats.total_generated_moves as f64 / stats.total_generate_move_calls as f64; - let throughput = (stats.nodes_explored) as f64 / (Instant::now() - start).as_secs_f64(); - format!( - "Explored {} nodes. MBF={:.1}\n{} nodes/sec", - stats.nodes_explored, mean_branching_factor, throughput as usize - ) -} - pub struct ParallelSearch { max_depth: u8, max_time: Duration, @@ -434,6 +425,22 @@ impl ParallelSearch { pub fn root_value(&self) -> Evaluation { unclamp_value(self.prev_value) } + + fn pretty_stats(&self, stats: &Stats, start: Instant, minimax: &ParallelNegamaxer, depth: u8) -> String { + let interval = Instant::now() - start; + let mbf = + stats.total_generated_moves as f64 / stats.total_generate_move_calls as f64; + let ebf = + (stats.nodes_explored as f64).powf((depth as f64 + 1.0).recip()); + let nps = (stats.nodes_explored) as f64 / interval.as_secs_f64(); + let count = stats.nodes_explored; + format!( + "Parallel (threads={}) depth={:>2}, took={:>6.0}ms; MBF={mbf:>6.1} EBF={ebf:>6.1}; NPS={nps:>9.0}; total={count:>11}", + minimax.par_opts.num_threads(), + depth, + interval.as_secs_f64()*1000.0, + ) + } } impl Strategy for ParallelSearch @@ -466,15 +473,18 @@ where &self.thread_pool, ); // Launch in threadpool and wait for result. - let value_move = self + let value_move_depth = self .thread_pool .install(|| negamaxer.iterative_search(s.clone(), self.max_depth, false)); self.principal_variation = negamaxer.principal_variation(); let mut stats = Stats::default(); - negamaxer.stats.do_all(|local| stats.add(local)); + negamaxer.stats.do_all_mut(|local| stats.add(local)); if self.opts.verbose { - eprintln!("{}", pretty_stats(&stats, start_time)); + eprintln!("{}", "——————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————"); + eprintln!("{}", self.pretty_stats(&stats, start_time, &negamaxer, value_move_depth.map_or(0, |v| v.2))); + eprintln!("principal variation: {}", pv_string::(&self.principal_variation(), s)); } + let value_move = value_move_depth.map(|v| (v.0, v.1)); value_move }?; self.prev_value = value;