diff --git a/.gitignore b/.gitignore index fa8d85a..5481574 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ Cargo.lock target + +**/*~ diff --git a/.project b/.project deleted file mode 100644 index 84544fd..0000000 --- a/.project +++ /dev/null @@ -1,17 +0,0 @@ - - - minimax - - - - - - com.github.rustdt.ide.core.Builder - - - - - - com.github.rustdt.ide.core.nature - - diff --git a/.rustfmt.toml b/.rustfmt.toml new file mode 100644 index 0000000..9d7ffed --- /dev/null +++ b/.rustfmt.toml @@ -0,0 +1,3 @@ +edition = "2021" +fn_params_layout = "Compressed" +use_small_heuristics = "Max" diff --git a/Cargo.toml b/Cargo.toml index cea9a90..8df0610 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,16 +1,32 @@ [package] name = "minimax" -version = "0.0.2" -authors = [ "Samuel Fredrickson " ] +version = "0.5.4" +authors = [ "Eric Roshan-Eisner ", "Samuel Fredrickson " ] description = "Generic implementations of Minimax." -documentation = "http://kinghajj.github.io/doc/minimax/" -repository = "http://github.com/kinghajj/minimax-rs.git" +documentation = "https://docs.rs/minimax" +edition = "2021" +repository = "https://github.com/edre/minimax-rs" readme = "README.md" keywords = ["ai", "game", "minimax", "negamax"] license = "MIT" [dependencies] -rand = "0.3.*" +instant = { version = "0.1", features = ["wasm-bindgen"] } +rand = "0.9" + +[target.'cfg(not(target_arch="wasm32"))'.dependencies] +num_cpus = "1.0" +rayon = "^1.5" + +[target.'cfg(target_arch="wasm32")'.dependencies] +getrandom = { version = "0.3", features = ["wasm_js"]} + +[dev-dependencies] +bencher = "0.1.5" + +[[bench]] +name = "negamax" +harness = false [profile.test] opt-level = 3 diff --git a/README.md b/README.md index 79ce1bf..8cfefaa 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,8 @@ # minimax-rs - Generic implementations of Minimax in Rust. -[![Build Status](https://travis-ci.org/kinghajj/minimax-rs.svg?branch=master)](https://travis-ci.org/kinghajj/minimax-rs) [![Crates.io](https://img.shields.io/crates/v/minimax.svg)](https://crates.io/crates/minimax) +[![Build Status](https://api.travis-ci.com/edre/minimax-rs.svg?branch=master)](https://travis-ci.com/github/edre/minimax-rs) +[![Crates.io](https://img.shields.io/crates/v/minimax.svg)](https://crates.io/crates/minimax) +[![Documentation](https://docs.rs/minimax/badge.svg)](https://docs.rs/minimax) ## About @@ -10,33 +12,15 @@ This library provides interfaces that describe: 2. methods of evaluating particular game states for a player; and 3. strategies for choosing moves for a player. -The eventual goal is to have multiple proper strategies, so that any combination -of evaluators and strategies can be tested against each other. Currently, only -a basic alpha-beta pruning Negamax strategy is implemented. +This crate implements multiple different strategies, so that any combination of +custom evaluators and strategies can be tested against each other. These include +single- and multi-threaded algorithms using alpha-beta pruning, iterative +deepening, and transposition tables. There is also a basic implementation of +multi-threaded Monte Carlo Tree Search, which does not require writing an +evaluator. ## Example -The `ttt` module contains an implementation of Tic-Tac-Toe, demonstrating how to -use the game and evaluation interfaces. `test` shows how to use strategies. - -## License - - Copyright (c) 2015 Samuel Fredrickson - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. +The `ttt` and `connect4` modules contain implementations of Tic-Tac-Toe and +Connect Four, demonstrating how to use the game and evaluation interfaces. +`test` shows how to use strategies. diff --git a/benches/negamax.rs b/benches/negamax.rs index e4ba293..51d2082 100644 --- a/benches/negamax.rs +++ b/benches/negamax.rs @@ -1,56 +1,47 @@ -#![feature(test)] +#[macro_use] +extern crate bencher; extern crate minimax; -extern crate test; -use test::Bencher; -use minimax::*; - -#[derive(Clone)] -pub struct Board; - -#[derive(Copy, Clone)] -pub struct Place; +#[path = "../examples/connect4.rs"] +mod connect4; -pub struct Eval; - -pub struct Noop; +use bencher::Bencher; +use minimax::*; -impl Move for Place { - type G = Noop; - fn apply(&self, _: &mut Board) { - } - fn undo(&self, _: &mut Board) { - } +fn bench_negamax(b: &mut Bencher) { + let board = connect4::Board::default(); + b.iter(|| { + let mut s = Negamax::new(connect4::BasicEvaluator::default(), 5); + let m = s.choose_move(&board); + assert!(m.is_some()); + }); } -impl Game for Noop { - type S = Board; - type M = Place; - - fn generate_moves(_: &Board, _: Player, ms: &mut [Option]) -> usize { - const NUM_MOVES: usize = 4; - for m in ms.iter_mut().take(NUM_MOVES) { - *m = Some(Place); - } - ms[NUM_MOVES] = None; - NUM_MOVES - } - - fn get_winner(_: &Board) -> Option { - None - } +fn bench_iterative(b: &mut Bencher) { + let board = connect4::Board::default(); + b.iter(|| { + let mut s = IterativeSearch::new( + connect4::BasicEvaluator::default(), + IterativeOptions::new().with_table_byte_size(32_000), + ); + s.set_max_depth(5); + let m = s.choose_move(&board); + assert!(m.is_some()); + }); } -impl Evaluator for Eval { - type G = Noop; - - fn evaluate(_: &Board, _: Option) -> Evaluation { - Evaluation::Score(0) - } +fn bench_parallel(b: &mut Bencher) { + let board = connect4::Board::default(); + b.iter(|| { + let mut s = ParallelSearch::new( + connect4::BasicEvaluator::default(), + IterativeOptions::new().with_table_byte_size(32_000), + ParallelOptions::new(), + ); + s.set_max_depth(5); + let m = s.choose_move(&board); + assert!(m.is_some()); + }); } -#[bench] -fn bench_negamax(b: &mut Bencher) { - let board = Board; - let mut s = Negamax::::new(Options { max_depth: 10 }); - b.iter(|| s.choose_move(&board, Player::Computer)); -} +benchmark_group!(benches, bench_negamax, bench_iterative, bench_parallel); +benchmark_main!(benches); diff --git a/examples/chess/Cargo.toml b/examples/chess/Cargo.toml new file mode 100644 index 0000000..f2a27f9 --- /dev/null +++ b/examples/chess/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "chess-minimax-example" +version = "0.1.0" +edition = "2021" + +[dependencies] +chess = "3.2" +minimax = { path = "../.." } \ No newline at end of file diff --git a/examples/chess/src/main.rs b/examples/chess/src/main.rs new file mode 100644 index 0000000..09c3018 --- /dev/null +++ b/examples/chess/src/main.rs @@ -0,0 +1,82 @@ +extern crate chess; +extern crate minimax; + +use chess::{Board, BoardStatus, ChessMove, MoveGen}; +use minimax::{Game, Strategy}; + +struct Chess; + +impl minimax::Game for Chess { + type S = Board; + type M = ChessMove; + + fn generate_moves(b: &Board, moves: &mut Vec) { + for m in MoveGen::new_legal(b) { + moves.push(m); + } + } + + fn get_winner(b: &Board) -> Option { + match b.status() { + BoardStatus::Ongoing => None, + BoardStatus::Stalemate => Some(minimax::Winner::Draw), + BoardStatus::Checkmate => Some(minimax::Winner::PlayerJustMoved), + } + } + + fn apply(b: &mut Board, m: ChessMove) -> Option { + Some(b.make_move_new(m)) + } + + fn zobrist_hash(b: &Board) -> u64 { + b.get_hash() + } + + fn notation(_b: &Board, m: ChessMove) -> Option { + Some(format!("{}", m)) + } +} + +#[derive(Default)] +struct Evaluator; + +impl minimax::Evaluator for Evaluator { + type G = Chess; + fn evaluate(&self, board: &Board) -> minimax::Evaluation { + let mut score = 0; + for sq in 0..64 { + let sq = unsafe { chess::Square::new(sq) }; + if let Some(piece) = board.piece_on(sq) { + let value = match piece { + chess::Piece::Pawn => 1, + chess::Piece::Knight => 3, + chess::Piece::Bishop => 3, + chess::Piece::Rook => 5, + chess::Piece::Queen => 9, + chess::Piece::King => 0, + }; + if board.color_on(sq).unwrap() == board.side_to_move() { + score += value; + } else { + score -= value; + } + } + } + score + } +} + +fn main() { + let mut b = Board::default(); + let opts = minimax::IterativeOptions::new().verbose(); + let mut strategy = minimax::IterativeSearch::new(Evaluator::default(), opts); + strategy.set_timeout(std::time::Duration::from_secs(1)); + while Chess::get_winner(&b).is_none() { + println!("{}", b); + match strategy.choose_move(&b) { + Some(m) => b = Chess::apply(&mut b, m).unwrap(), + None => break, + } + } + println!("Checkmate {:?}", b.side_to_move()); +} diff --git a/examples/connect4.rs b/examples/connect4.rs new file mode 100644 index 0000000..2d81670 --- /dev/null +++ b/examples/connect4.rs @@ -0,0 +1,392 @@ +//! A definition of the game Connect Four using the library, for use in tests and benchmarks. +#![allow(dead_code)] + +extern crate minimax; + +use std::default::Default; +use std::fmt::{Display, Formatter, Result}; + +#[derive(Clone)] +pub struct Board { + // Some bitboard ideas from http://blog.gamesolver.org/solving-connect-four/06-bitboard/ + /* bit order example: + * Leaves a blank row on top. + * 5 12 19 26 33 40 47 + * 4 11 18 25 32 39 46 + * 3 10 17 24 31 38 45 + * 2 9 16 23 30 37 44 + * 1 8 15 22 29 36 43 + * 0 7 14 21 28 35 42 + */ + all_pieces: u64, + pub pieces_to_move: u64, + num_moves: u8, + hash: u64, +} + +const NUM_COLS: u32 = 7; +const NUM_ROWS: u32 = 6; +const HEIGHT: u32 = NUM_ROWS + 1; +const COL_MASK: u64 = (1 << NUM_ROWS) - 1; + +impl Board { + fn reds_move(&self) -> bool { + self.num_moves & 1 == 0 + } + + pub fn pieces_just_moved(&self) -> u64 { + self.all_pieces ^ self.pieces_to_move + } + + fn update_hash(&mut self, piece: u64) { + // Lookup the hash for this position and this color. + let position = piece.trailing_zeros() as usize; + let color = self.num_moves as usize & 1; + self.hash ^= HASHES[(position << 1) | color]; + } +} + +impl Default for Board { + fn default() -> Board { + Board { all_pieces: 0, pieces_to_move: 0, num_moves: 0, hash: 0 } + } +} + +impl Display for Board { + fn fmt(&self, f: &mut Formatter) -> Result { + let red_pieces = + if self.reds_move() { self.pieces_to_move } else { self.pieces_just_moved() }; + let yellow_pieces = + if self.reds_move() { self.pieces_just_moved() } else { self.pieces_to_move }; + for row in (0..6).rev() { + for col in 0..7 { + write!( + f, + "{}", + if red_pieces >> (row + col * HEIGHT) & 1 != 0 { + '\u{1F534}' + } else if yellow_pieces >> (row + col * HEIGHT) & 1 != 0 { + '\u{1F7E1}' + } else { + '\u{25ef}' + } + )?; + } + writeln!(f, "")?; + } + Ok(()) + } +} + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub struct Place { + pub col: u8, +} + +impl Place { + fn col_shift(&self) -> u32 { + self.col as u32 * HEIGHT + } +} + +pub struct Game; + +impl minimax::Game for Game { + type S = Board; + type M = Place; + + fn generate_moves(b: &Board, moves: &mut Vec) { + let mut cols = b.all_pieces; + for i in 0..NUM_COLS { + if cols & COL_MASK < COL_MASK { + moves.push(Place { col: i as u8 }); + } + cols >>= HEIGHT; + } + } + + fn get_winner(b: &Board) -> Option { + // Position of pieces for the player that just moved. + let pieces = b.pieces_just_moved(); + + // Detect pairs of two pieces in a row, then pairs of two pairs in a + // row. + let matches = |shift| -> bool { + let pairs = pieces & (pieces >> shift); + pairs & (pairs >> 2 * shift) != 0 + }; + + if matches(1) || matches(HEIGHT) || matches(HEIGHT + 1) || matches(HEIGHT - 1) { + return Some(minimax::Winner::PlayerJustMoved); + } + + // Full board with no winner. + if b.num_moves as u32 == NUM_ROWS * NUM_COLS { + Some(minimax::Winner::Draw) + } else { + None + } + } + + fn apply(b: &mut Board, place: Place) -> Option { + let mut b = b.clone(); + let col = (b.all_pieces >> place.col_shift()) & COL_MASK; + let new_piece = (col + 1) << place.col_shift(); + // Swap colors + b.pieces_to_move ^= b.all_pieces; + b.all_pieces |= new_piece; + b.num_moves += 1; + b.update_hash(new_piece); + Some(b) + } + + fn zobrist_hash(b: &Board) -> u64 { + b.hash + } +} + +pub struct DumbEvaluator; + +impl minimax::Evaluator for DumbEvaluator { + type G = Game; + fn evaluate(&self, _: &Board) -> minimax::Evaluation { + 0 + } +} + +impl Board { + // Return bitmap of all open locations that would complete a four in a row for the given player. + fn find_fourth_moves(&self, pieces: u64) -> u64 { + let mut all = self.all_pieces; + // Mark the fake row on top as full to prevent wrapping around. + let mut top_row = COL_MASK + 1; + for _ in 0..NUM_COLS { + all |= top_row; + top_row <<= HEIGHT; + } + + let matches = |shift| -> u64 { + let pairs = pieces & (pieces >> shift); // Pairs of this color. + let singles = (pieces >> shift) & !all | (pieces << shift) & !all; // One of this color and one empty. + (pairs >> shift * 2) & singles | (pairs << shift * 2) & singles + }; + + // Vertical + matches(1) | + // Horizontal + matches(HEIGHT) | + // Diagonal + matches(HEIGHT+1) | + // Other diagonal + matches(HEIGHT-1) + } +} + +#[derive(Clone)] +pub struct BasicEvaluator; + +impl Default for BasicEvaluator { + fn default() -> Self { + Self {} + } +} + +impl minimax::Evaluator for BasicEvaluator { + type G = Game; + fn evaluate(&self, b: &Board) -> minimax::Evaluation { + let player_pieces = b.pieces_to_move; + let opponent_pieces = b.pieces_just_moved(); + let mut player_wins = b.find_fourth_moves(player_pieces); + let mut opponent_wins = b.find_fourth_moves(opponent_pieces); + + let mut score = 0; + // Bonus points for moves in the middle columns. + for col in 2..5 { + score += + ((player_pieces >> (HEIGHT * col)) & COL_MASK).count_ones() as minimax::Evaluation; + score -= ((opponent_pieces >> (HEIGHT * col)) & COL_MASK).count_ones() + as minimax::Evaluation; + } + + // Count columns that cause immediate win. + // Count columns that then allow immediate win. + let mut all = b.all_pieces; + for _ in 0..NUM_COLS { + let next_move = (all & COL_MASK) + 1; + if next_move > COL_MASK { + continue; + } + if next_move & player_wins != 0 { + score += 10; + } + if next_move & opponent_wins != 0 { + score -= 10; + } + let afterwards_move = next_move << 1; + if afterwards_move & player_wins != 0 { + score += 5; + } + if afterwards_move & opponent_wins != 0 { + score -= 5; + } + + all >>= HEIGHT; + player_wins >>= HEIGHT; + opponent_wins >>= HEIGHT; + } + + score + } +} + +fn main() { + use minimax::*; + + let mut b = Board::default(); + + if std::env::args().any(|arg| arg == "perft") { + perft::(&mut b, 10, false); + return; + } + + let mut dumb = IterativeSearch::new( + BasicEvaluator::default(), + IterativeOptions::new().with_double_step_increment(), + ); + dumb.set_max_depth(8); + + let opts = + IterativeOptions::new().with_table_byte_size(64_000_000).with_double_step_increment(); + let mut iterative = + IterativeSearch::new(BasicEvaluator::default(), opts.clone().with_aspiration_window(5)); + iterative.set_max_depth(12); + let mut parallel = ParallelSearch::new(BasicEvaluator::default(), opts, ParallelOptions::new()); + parallel.set_max_depth(12); + + let mut strategies: [&mut dyn Strategy; 3] = + [&mut dumb, &mut iterative, &mut parallel]; + + if std::env::args().any(|arg| arg == "parallel") { + strategies.swap(1, 2); + } + + let mut s = 0; + while self::Game::get_winner(&b).is_none() { + println!("{}", b); + let ref mut strategy = strategies[s]; + match strategy.choose_move(&mut b) { + Some(m) => { + let color = if b.reds_move() { "Red" } else { "Yellow" }; + println!("{} piece in column {}", color, m.col + 1); + b = self::Game::apply(&mut b, m).unwrap(); + } + None => break, + } + s = 1 - s; + } + println!("{}", b); +} + +// There aren't that many positions per color, so just encode the zobrist hash statically. +const HASHES: [u64; 100] = [ + 0x73399349585d196e, + 0xe512dc15f0da3dd1, + 0x4fbc1b81c6197db2, + 0x16b5034810111a66, + 0xa9a9d0183e33c311, + 0xbb9d7bdea0dad2d6, + 0x089d9205c11ca5c7, + 0x18d9db91aa689617, + 0x1336123120681e34, + 0xc902e6c0bd6ef6bf, + 0x16985ba0916238c1, + 0x6144c3f2ab9f6dc4, + 0xf24b4842de919a02, + 0xdd6dd35ba0c150a1, + 0x369a9de8ec3676e3, + 0x2c366fb99be782d8, + 0x24d3231335c0dbd6, + 0x14048390c56e38f1, + 0x55dfbc820f635186, + 0x0dc98cb87372d5fa, + 0xe3098781582027b4, + 0x088158ec8202adca, + 0x231df62376ad9514, + 0xd3747fad069caeae, + 0x4e4f26cb41d0c620, + 0x06d0e37cd11b8f1c, + 0xed33865175fbbdd2, + 0xf1f52569481f0d8f, + 0xfb6fd5c922e2127c, + 0x6778bb0eba4a6649, + 0xe35b853bdac1210b, + 0x465a67712ec749a2, + 0x83b1fd78e576fe72, + 0xe84827644a5ccbe6, + 0x89095321ce8e4d03, + 0x298c529eecb0ec36, + 0xe9dcc93d77cb49ad, + 0xa7446daa1834c04a, + 0x93f15442b434d550, + 0x7f2a36dbf1cbce3f, + 0x03365a42023b02b3, + 0x101d87e850689cda, + 0x113b31e2760d2050, + 0x9cdb7b7394e1b0ae, + 0xd04530b3b7daf3a3, + 0x717e67aed6b4ffc9, + 0x4ae564a3f3ca8b03, + 0x07c50a4d89351437, + 0x7f3b32175e5f37e0, + 0x6e3599203bb50cd7, + 0xcfe2319d4a6cfa73, + 0xdbc6a398b10f5c3b, + 0x9c1ba28ae655bbd1, + 0x9dc87a426451941a, + 0x691e618354a55cb5, + 0x61b8cabbc575f4ba, + 0x7e6f31f1818593d4, + 0x9fa69e1ef4df8a9b, + 0x5a9dc96c3cb18d8f, + 0x65c4e9c0f40114f5, + 0x4e66504db2d937cf, + 0x4ebd6d097fe1e256, + 0xfb10983e639af6b1, + 0xcfbed7bd4032a59a, + 0x1f47f6a95049fe4f, + 0xbd461d202b879890, + 0xfc050073b0c74cbe, + 0x2923526a1f7092e9, + 0x0b1d30bb6b960bc7, + 0x632d12e4a9d0229d, + 0x8d4ffd6ab37c6bfd, + 0x561e36b8609b94ec, + 0x32e8482c9e7ed80c, + 0xaf62a119227b1029, + 0x62cb2a585410c311, + 0x7df3aeef90e1a0cb, + 0xe6d5a176f8a1b180, + 0x156e5162d8f2bef8, + 0xee84c58f5ebbe811, + 0xd32a1b4e24038bac, + 0xeaa1dbdbdd7731f7, + 0xedb554afd3d07cc6, + 0xbc789444317d4d05, + 0x0e23ce8f3d581fcd, + 0xacb498d4569249a8, + 0x843fb2519edc9f5a, + 0xe222f0eb79436809, + 0x7a88365f089ae80b, + 0x2a0f08694d7ea84d, + 0x09cad4dbfc990fa2, + 0xfe5f27499de6b4f8, + 0x3d8ed8ab1d44997f, + 0x2af64deca431f644, + 0xf2712b5274180c36, + 0x30eeae3a821bf86c, + 0x31c921831f06ad2f, + 0x40683ff11655cd2f, + 0xb78183a74cd6cb03, + 0xde9e15a6f99bda2f, + 0xa5293988641edb9b, +]; diff --git a/examples/mancala.rs b/examples/mancala.rs new file mode 100644 index 0000000..5407810 --- /dev/null +++ b/examples/mancala.rs @@ -0,0 +1,186 @@ +extern crate minimax; + +use crate::minimax::{Game, Strategy}; +use std::fmt; + +#[derive(Copy, Clone)] +struct Board { + // First index by player. + // Next index by pit, counting down from 6 to 1 for the pits in play. + // Pit zero is that player's store. + // If I wanted to be crazy bit twiddly I could put these in a pair of u64s and shift stuff around. + pits: [[u8; 7]; 2], + skipped: bool, + // u1 of pits player index. + to_move: bool, +} + +impl Default for Board { + fn default() -> Board { + Board { pits: [[0, 4, 4, 4, 4, 4, 4]; 2], skipped: false, to_move: false } + } +} + +// 1-6 means play from that pit. +// 0 means pass (because of being skipped). +type Move = u8; + +struct Mancala; + +impl minimax::Game for Mancala { + type S = Board; + type M = Move; + + fn generate_moves(board: &Board, moves: &mut Vec) { + if board.skipped { + moves.push(0); + return; + } + for i in 1..7 { + if board.pits[board.to_move as usize][i] > 0 { + moves.push(i as Move); + } + } + } + + fn apply(board: &mut Board, m: Move) -> Option { + let mut board = board.clone(); + if board.skipped { + board.skipped = false; + board.to_move = !board.to_move; + return Some(board); + } + + // Grab the stones. + let mut player = board.to_move as usize; + let mut i = m as usize; + let mut stones = board.pits[player][i]; + board.pits[player][i] = 0; + // At the beginning of each iteration, it points at the previous pit. + while stones > 0 { + if player == board.to_move as usize && i == 0 { + i = 6; + player ^= 1; + } else if player != board.to_move as usize && i == 1 { + i = 6; + player ^= 1; + } else { + i -= 1; + } + board.pits[player][i] += 1; + stones -= 1; + } + + if player == board.to_move as usize { + if i == 0 { + // End condition: ends in own bowl + board.skipped = true; + } else if board.pits[player][i] == 1 { + // End condition: ends on own side in empty pit + let captured = board.pits[player][i] + board.pits[player ^ 1][7 - i]; + board.pits[player][i] = 0; + board.pits[player ^ 1][7 - i] = 0; + board.pits[player][0] += captured; + } + } + + board.to_move = !board.to_move; + Some(board) + } + + fn get_winner(board: &Board) -> Option { + if board.pits[0][1..].iter().sum::() == 0 || board.pits[1][1..].iter().sum::() == 0 + { + let to_move_total = board.pits[board.to_move as usize].iter().sum::(); + Some(if to_move_total == 24 { + minimax::Winner::Draw + } else if to_move_total > 24 { + minimax::Winner::PlayerToMove + } else { + minimax::Winner::PlayerJustMoved + }) + } else { + None + } + } + + fn zobrist_hash(board: &Board) -> u64 { + let mut hash = board.to_move as u64; + for i in 0..7 { + hash ^= HASHES[i].wrapping_mul(board.pits[0][i] as u64); + hash ^= HASHES[i + 7].wrapping_mul(board.pits[1][i] as u64); + } + hash + } + + fn null_move(_: &Board) -> Option { + Some(0) + } + + fn notation(_: &Board, m: Move) -> Option { + Some(if m == 0 { "skipped".to_owned() } else { format!("pit {}", m) }) + } + + fn table_index(m: Move) -> u16 { + m as u16 + } + fn max_table_index() -> u16 { + 6 + } +} + +impl fmt::Display for Board { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "+-----------------------+\n| |")?; + for pit in &self.pits[1][1..] { + write!(f, "{:>2}|", pit)?; + } + write!(f, " |\n+{:>2}+--+--+--+--+--+--+{:>2}+\n| ", self.pits[1][0], self.pits[0][0])?; + for pit in self.pits[0][1..].iter().rev() { + write!(f, "|{:>2}", pit)?; + } + write!(f, "| |\n+-----------------------+\n") + } +} + +#[derive(Default)] +struct Evaluator; + +impl minimax::Evaluator for Evaluator { + type G = Mancala; + fn evaluate(&self, board: &Board) -> minimax::Evaluation { + board.pits[board.to_move as usize].iter().sum::() as minimax::Evaluation - 24 + } +} + +fn main() { + let mut board = Board::default(); + let opts = minimax::IterativeOptions::new().verbose(); + let mut strategy = minimax::IterativeSearch::new(Evaluator::default(), opts); + strategy.set_timeout(std::time::Duration::from_secs(1)); + while Mancala::get_winner(&board).is_none() { + println!("{}", board); + match strategy.choose_move(&board) { + Some(m) => board = Mancala::apply(&mut board, m).unwrap(), + None => break, + } + } + println!("Winner player {:?}", board.to_move as u8 + 1); +} + +const HASHES: [u64; 14] = [ + 0x73399349585d196e, + 0xe512dc15f0da3dd1, + 0x4fbc1b81c6197db2, + 0x16b5034810111a66, + 0xa9a9d0183e33c311, + 0xbb9d7bdea0dad2d6, + 0x089d9205c11ca5c7, + 0x18d9db91aa689617, + 0x1336123120681e34, + 0xc902e6c0bd6ef6bf, + 0x16985ba0916238c1, + 0x6144c3f2ab9f6dc4, + 0xf24b4842de919a02, + 0xdd6dd35ba0c150a1, +]; diff --git a/examples/ttt.rs b/examples/ttt.rs index 02f74e3..eaf7e3e 100644 --- a/examples/ttt.rs +++ b/examples/ttt.rs @@ -9,7 +9,6 @@ extern crate minimax; use std::default::Default; use std::fmt::{Display, Formatter, Result}; -use std::convert::From; #[derive(Copy, Clone, PartialEq, Eq)] #[repr(u8)] @@ -19,6 +18,16 @@ pub enum Square { O, } +impl Square { + fn invert(&self) -> Self { + match *self { + Square::Empty => Square::Empty, + Square::X => Square::O, + Square::O => Square::X, + } + } +} + impl Default for Square { fn default() -> Square { Square::Empty @@ -27,63 +36,41 @@ impl Default for Square { impl Display for Square { fn fmt(&self, f: &mut Formatter) -> Result { - write!(f, - "{}", - match *self { - Square::Empty => ' ', - Square::X => 'X', - Square::O => 'O', - }) - } -} - -impl From for Square { - fn from(p: minimax::Player) -> Square { - match p { - minimax::Player::Computer => Square::X, - minimax::Player::Opponent => Square::O, - } - } -} - -impl From for minimax::Player { - fn from(s: Square) -> minimax::Player { - match s { - Square::X => minimax::Player::Computer, - Square::O => minimax::Player::Opponent, - _ => panic!("From::from(Square::Empty))"), - } + write!( + f, + "{}", + match *self { + Square::Empty => ' ', + Square::X => 'X', + Square::O => 'O', + } + ) } } #[derive(Clone, PartialEq, Eq)] pub struct Board { squares: [Square; 9], + to_move: Square, +} + +impl Board { + fn just_moved(&self) -> Square { + self.to_move.invert() + } } impl Default for Board { fn default() -> Board { - Board { squares: [Square::default(); 9] } + Board { squares: [Square::default(); 9], to_move: Square::X } } } impl Display for Board { fn fmt(&self, f: &mut Formatter) -> Result { - try!(writeln!(f, - "{} | {} | {}", - self.squares[0], - self.squares[1], - self.squares[2])); - try!(writeln!(f, - "{} | {} | {}", - self.squares[3], - self.squares[4], - self.squares[5])); - try!(writeln!(f, - "{} | {} | {}", - self.squares[6], - self.squares[7], - self.squares[8])); + writeln!(f, "{} | {} | {}", self.squares[0], self.squares[1], self.squares[2])?; + writeln!(f, "{} | {} | {}", self.squares[3], self.squares[4], self.squares[5])?; + writeln!(f, "{} | {} | {}", self.squares[6], self.squares[7], self.squares[8])?; Ok(()) } } @@ -94,56 +81,67 @@ impl minimax::Game for Game { type S = Board; type M = Place; - fn generate_moves(b: &Board, p: minimax::Player, ms: &mut [Option]) -> usize { - let mut j = 0; + fn generate_moves(b: &Board, ms: &mut Vec) { for i in 0..b.squares.len() { if b.squares[i] == Square::Empty { - ms[j] = Some(Place { - i: i as u8, - s: From::from(p), - }); - j += 1; + ms.push(Place { i: i as u8 }); } } - ms[j] = None; - j } fn get_winner(b: &Board) -> Option { + // A player can only cause themselves to win on their turn, so only check for that. + // horizontal wins - if b.squares[0] != Square::Empty && b.squares[0] == b.squares[1] && - b.squares[1] == b.squares[2] { - return Some(minimax::Winner::Competitor(From::from(b.squares[0]))); + if b.squares[0] == b.just_moved() + && b.squares[0] == b.squares[1] + && b.squares[1] == b.squares[2] + { + return Some(minimax::Winner::PlayerJustMoved); } - if b.squares[3] != Square::Empty && b.squares[3] == b.squares[4] && - b.squares[4] == b.squares[5] { - return Some(minimax::Winner::Competitor(From::from(b.squares[3]))); + if b.squares[3] == b.just_moved() + && b.squares[3] == b.squares[4] + && b.squares[4] == b.squares[5] + { + return Some(minimax::Winner::PlayerJustMoved); } - if b.squares[6] != Square::Empty && b.squares[6] == b.squares[7] && - b.squares[7] == b.squares[8] { - return Some(minimax::Winner::Competitor(From::from(b.squares[6]))); + if b.squares[6] == b.just_moved() + && b.squares[6] == b.squares[7] + && b.squares[7] == b.squares[8] + { + return Some(minimax::Winner::PlayerJustMoved); } // vertical wins - if b.squares[0] != Square::Empty && b.squares[0] == b.squares[3] && - b.squares[3] == b.squares[6] { - return Some(minimax::Winner::Competitor(From::from(b.squares[0]))); + if b.squares[0] == b.just_moved() + && b.squares[0] == b.squares[3] + && b.squares[3] == b.squares[6] + { + return Some(minimax::Winner::PlayerJustMoved); } - if b.squares[1] != Square::Empty && b.squares[1] == b.squares[4] && - b.squares[4] == b.squares[7] { - return Some(minimax::Winner::Competitor(From::from(b.squares[1]))); + if b.squares[1] == b.just_moved() + && b.squares[1] == b.squares[4] + && b.squares[4] == b.squares[7] + { + return Some(minimax::Winner::PlayerJustMoved); } - if b.squares[2] != Square::Empty && b.squares[2] == b.squares[5] && - b.squares[5] == b.squares[8] { - return Some(minimax::Winner::Competitor(From::from(b.squares[2]))); + if b.squares[2] == b.just_moved() + && b.squares[2] == b.squares[5] + && b.squares[5] == b.squares[8] + { + return Some(minimax::Winner::PlayerJustMoved); } // diagonal wins - if b.squares[0] != Square::Empty && b.squares[0] == b.squares[4] && - b.squares[4] == b.squares[8] { - return Some(minimax::Winner::Competitor(From::from(b.squares[0]))); + if b.squares[0] == b.just_moved() + && b.squares[0] == b.squares[4] + && b.squares[4] == b.squares[8] + { + return Some(minimax::Winner::PlayerJustMoved); } - if b.squares[2] != Square::Empty && b.squares[2] == b.squares[4] && - b.squares[4] == b.squares[6] { - return Some(minimax::Winner::Competitor(From::from(b.squares[2]))); + if b.squares[2] == b.just_moved() + && b.squares[2] == b.squares[4] + && b.squares[4] == b.squares[6] + { + return Some(minimax::Winner::PlayerJustMoved); } // draws if b.squares.iter().all(|s| *s != Square::Empty) { @@ -153,43 +151,41 @@ impl minimax::Game for Game { None } } + + fn apply(b: &mut Board, m: Place) -> Option { + b.squares[m.i as usize] = b.to_move; + b.to_move = b.to_move.invert(); + None + } + fn undo(b: &mut Board, m: Place) { + b.squares[m.i as usize] = Square::Empty; + b.to_move = b.to_move.invert(); + } } #[derive(Copy, Clone, PartialEq, Eq)] pub struct Place { i: u8, - s: Square, } impl Display for Place { fn fmt(&self, f: &mut Formatter) -> Result { - write!(f, "{}@{}", self.s, self.i) + write!(f, "@{}", self.i) } } -impl minimax::Move for Place { - type G = Game; - fn apply(&self, b: &mut Board) { - b.squares[self.i as usize] = self.s; - } - fn undo(&self, b: &mut Board) { - b.squares[self.i as usize] = Square::Empty; +pub struct Evaluator; + +impl Default for Evaluator { + fn default() -> Self { + Self {} } } -pub struct Evaluator; - impl minimax::Evaluator for Evaluator { type G = Game; // adapted from http://www.cs.olemiss.edu/~dwilkins/CSCI531/tic.c - fn evaluate(b: &Board, mw: Option) -> minimax::Evaluation { - match mw { - Some(minimax::Winner::Competitor(wp)) => match wp { - minimax::Player::Computer => return minimax::Evaluation::Best, - minimax::Player::Opponent => return minimax::Evaluation::Worst, - }, - _ => {} - } + fn evaluate(&self, b: &Board) -> minimax::Evaluation { let mut score = 0; // 3rd: check for doubles @@ -231,27 +227,29 @@ impl minimax::Evaluator for Evaluator { if b.squares[4] == Square::O { score -= 5; } - minimax::Evaluation::Score(score) + if b.to_move == Square::X { + score + } else { + -score + } } } fn main() { - use minimax::{Game, Move, Strategy}; - use minimax::strategies::negamax::{Negamax, Options}; + use minimax::strategies::negamax::Negamax; + use minimax::{Game, Strategy}; let mut b = Board::default(); - let mut strategies = vec![ - (minimax::Player::Computer, Negamax::::new(Options { max_depth: 10 })), - (minimax::Player::Opponent, Negamax::::new(Options { max_depth: 10 })), - ]; + let mut strategies = + vec![Negamax::new(Evaluator::default(), 10), Negamax::new(Evaluator::default(), 10)]; let mut s = 0; while self::Game::get_winner(&b).is_none() { println!("{}", b); - let (p, ref mut strategy) = strategies[s]; - match strategy.choose_move(&mut b, p) { - Some(m) => m.apply(&mut b), + let ref mut strategy = strategies[s]; + match strategy.choose_move(&mut b) { + Some(m) => self::Game::apply(&mut b, m), None => break, - } + }; s = 1 - s; } println!("{}", b); diff --git a/src/interface.rs b/src/interface.rs index d9b8661..73b9894 100644 --- a/src/interface.rs +++ b/src/interface.rs @@ -1,140 +1,171 @@ //! The common structures and traits. -use std::ops; +/// An assessment of a game state from the perspective of the player whose turn it is to play. +/// Higher values mean a more favorable state. +/// A draw is defined as a score of zero. +pub type Evaluation = i16; -/// A competitor within a game. -/// -/// For simplicity, only two players are supported. Their values correspond to -/// the "color" parameter in Negamax. -#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] -#[repr(i8)] -pub enum Player { - Computer = 1, - Opponent = -1, -} - -/// Negating a player results in the opposite one. -impl ops::Neg for Player { - type Output = Player; - #[inline] - fn neg(self) -> Player { - match self { - Player::Computer => Player::Opponent, - Player::Opponent => Player::Computer, - } - } -} +// These definitions ensure that they negate to each other, but it leaves +// i16::MIN as a valid value less than WORST_EVAL. Don't use this value, and +// any Strategy will panic when it tries to negate it. -/// An assessment of a game state from a particular player's perspective. -#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] -pub enum Evaluation { - /// An absolutely disastrous outcome, e.g. a loss. - Worst, - /// An outcome with some score. Higher values mean a more favorable state. - Score(i64), - /// An absolutely wonderful outcome, e.g. a win. - Best, -} - -/// Negating an evaluation results in the corresponding one from the other -/// player's persective. -impl ops::Neg for Evaluation { - type Output = Evaluation; - #[inline] - fn neg(self) -> Evaluation { - match self { - Evaluation::Worst => Evaluation::Best, - Evaluation::Score(s) => Evaluation::Score(-s), - Evaluation::Best => Evaluation::Worst, - } - } -} - -/// Multiplying a player and an evaluation negates the latter iff the former -/// is `Opponent`. -impl ops::Mul for Player { - type Output = Evaluation; - #[inline] - fn mul(self, e: Evaluation) -> Evaluation { - match self { - Player::Computer => e, - Player::Opponent => -e, - } - } -} +/// An absolutely wonderful outcome, e.g. a win. +pub const BEST_EVAL: Evaluation = i16::MAX; +/// An absolutely disastrous outcome, e.g. a loss. +pub const WORST_EVAL: Evaluation = -BEST_EVAL; /// Evaluates a game's positions. -/// -/// The methods are defined recursively, so that implementing one is sufficient. pub trait Evaluator { /// The type of game that can be evaluated. type G: Game; - /// Evaluate the state from the persective of `Player::Computer`. - #[inline] - fn evaluate(s: &::S, mw: Option) -> Evaluation { - Self::evaluate_for(s, mw, Player::Computer) - } + /// Evaluate the non-terminal state from the persective of the player to + /// move next. + fn evaluate(&self, s: &::S) -> Evaluation; - /// Evaluate the state from the given player's persective. - #[inline] - fn evaluate_for(s: &::S, mw: Option, p: Player) -> Evaluation { - p * Self::evaluate(s, mw) + /// Optional interface to support strategies using quiescence search. + /// + /// A "noisy" move is a threatening move that requires a response. + /// + /// The term comes from chess, where capturing a piece is considered a noisy + /// move. Capturing a piece is often the first move out of an exchange of + /// captures. Evaluating the board state after only the first capture can + /// give a misleadingly high score. The solution is to continue the search + /// among only noisy moves and find the score once the board state settles. + /// + /// Noisy moves are not inherent parts of the rules, but engine decisions, + /// so they are implemented in Evaluator instead of Game. + fn generate_noisy_moves( + &self, _state: &::S, _moves: &mut Vec<::M>, + ) { + // When unimplemented, there are no noisy moves and search terminates + // immediately. } -} -/// Defines how a move affects the game state. -/// -/// A move is able to change initial `Game` state, as well as revert the state. -/// This allows the game tree to be searched with a constant amount of space. -pub trait Move { - /// The type of game that the move affects. - type G: Game; - /// Change the state of `S` so that the move is applied. - #[inline] - fn apply(&self, &mut ::S); - /// Revert the state of `S` so that the move is undone. - #[inline] - fn undo(&self, &mut ::S); + // TODO reorder moves by assigning value to each state and combining with countermoves table etc. } /// The result of playing a game until it finishes. #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub enum Winner { - /// A player won. - Competitor(Player), + /// The player who made the last move won. + PlayerJustMoved, /// Nobody won. Draw, + /// The player who made the last move lost. + /// + /// This is uncommon, and many games (chess, checkers, tic-tac-toe, etc) + /// do not have this possibility. + PlayerToMove, +} + +impl Winner { + /// Canonical evaluations for end states. + pub fn evaluate(&self) -> Evaluation { + match *self { + Winner::PlayerJustMoved => WORST_EVAL, + Winner::PlayerToMove => BEST_EVAL, + Winner::Draw => 0, + } + } } /// Defines the rules for a two-player, perfect-knowledge game. /// /// A game ties together types for the state and moves, generates the possible /// moves from a particular state, and determines whether a state is terminal. -pub trait Game : Sized { +/// +/// This is meant to be defined on an empty newtype so that a game engine can +/// be implemented in a separate crate without having to know about these +/// `minimax` traits. +pub trait Game: Sized { /// The type of the game state. type S; /// The type of game moves. - type M: Move; + type M: Copy; - /// Generate moves for a player at the given state. After finishing, the - /// next entry in the slice should be set to `None` to indicate the end. - /// Returns the number of moves generated. + /// Generate moves at the given state. + fn generate_moves(state: &Self::S, moves: &mut Vec); + + /// Apply a move to get a new state. + /// + /// If the method returns a new state, the caller should use that. If the + /// method returns None, the caller should use the original. + /// This enables two different implementation strategies: + /// + /// 1) Games with large state that want to update in place. + /// ``` + /// struct BigBoard([u8; 4096]); + /// struct BigMove(u16); + /// fn apply(state: &mut BigBoard, m: BigMove) -> Option { + /// state.0[m.0 as usize] += 1; + /// None + /// } + /// fn undo(state: &mut BigBoard, m: BigMove) { + /// state.0[m.0 as usize] -= 1; + /// } + /// ``` /// - /// Currently, there's a deficiency that all strategies assume that at most - /// 100 moves may be generated for any position, which allows the underlying - /// memory for the slice to be a stack-allocated array. One stable, this - /// trait will be extended with an associated constant to specify the - /// maximum number of moves. - #[inline] - fn generate_moves(&Self::S, Player, &mut [Option]) -> usize; - - /// Returns `Some(Competitor(winning_player))` if there's a winner, + /// 2) Games with small state that don't want to implement undo. + /// ``` + /// struct SmallBoard(u64); + /// struct SmallMove(u8); + /// fn apply(state: &mut SmallBoard, m: SmallMove) -> Option { + /// Some(SmallBoard(state.0 | (1< Option; + + /// Undo mutation done in apply, if any. + fn undo(_state: &mut Self::S, _m: Self::M) {} + + /// Returns `Some(PlayerJustMoved)` or `Some(PlayerToMove)` if there's a winner, /// `Some(Draw)` if the state is terminal without a winner, and `None` if /// the state is non-terminal. - fn get_winner(&Self::S) -> Option; + fn get_winner(state: &Self::S) -> Option; + + /// Hash of the game state. + /// Expected to be pre-calculated and cheaply updated with each apply. + fn zobrist_hash(_state: &Self::S) -> u64 { + unimplemented!("game has not implemented zobrist hash"); + } + + /// Optional method to return a move that does not change the board state. + /// This does not need to be a legal move from this position, but it is + /// used in some strategies to reject a position early if even passing gives + /// a good position for the opponent. + fn null_move(_state: &Self::S) -> Option { + None + } + + /// Return a human-readable notation for this move in this game state. + fn notation(_state: &Self::S, _move: Self::M) -> Option { + None + } + /// Return a small index for this move for position-independent tables. + fn table_index(_: Self::M) -> u16 { + 0 + } + /// Maximum index value. + fn max_table_index() -> u16 { + 0 + } } -/// Defines a method of choosing a move for either player in a any game. +/// Defines a method of choosing a move for the current player. pub trait Strategy { - fn choose_move(&mut self, &G::S, Player) -> Option; + fn choose_move(&mut self, state: &G::S) -> Option; + + /// For strategies that can ponder indefinitely, set the timeout. + /// This can be changed between calls to choose_move. + fn set_timeout(&mut self, _timeout: std::time::Duration) {} + + /// Set the maximum depth to evaluate (instead of the timeout). + /// This can be changed between calls to choose_move. + fn set_max_depth(&mut self, _depth: u8) {} + + /// From the last choose_move call, return the principal variation, + /// i.e. the best sequence of moves for both players. + fn principal_variation(&self) -> Vec { + Vec::new() + } } diff --git a/src/lib.rs b/src/lib.rs index 70ec9b9..bc83adf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,8 +1,81 @@ -extern crate rand; +//! The `minimax` library provides interfaces for defining two-player +//! perfect-knowledge games, and strategies for choosing moves. +//! +//! Any game can be defined by implementing the Game trait, in terms of a game +//! state type and a move type. +//! ``` +//! use minimax::Strategy; +//! +//! // Stateless rules object. +//! struct TugOfWar; +//! // State of the game. +//! #[derive(Clone)] +//! struct War(i8); +//! // A move that a player can make. +//! #[derive(Copy, Clone, Debug, Eq, PartialEq)] +//! struct Tug(i8); +//! +//! impl minimax::Game for TugOfWar { +//! type S = War; +//! type M = Tug; +//! +//! fn generate_moves(s: &War, moves: &mut Vec) { +//! moves.push(Tug(-1)); +//! moves.push(Tug(1)); +//! } +//! +//! fn get_winner(state: &War) -> Option { +//! if state.0 > 9 { +//! Some(if state.0 % 2 == 0 { +//! minimax::Winner::PlayerJustMoved +//! } else { +//! minimax::Winner::PlayerToMove +//! }) +//! } else if state.0 < -9 { +//! Some(if state.0 % 2 == 0 { +//! minimax::Winner::PlayerToMove +//! } else { +//! minimax::Winner::PlayerJustMoved +//! }) +//! } else { +//! None +//! } +//! } +//! +//! fn apply(state: &mut War, tug: Tug) -> Option { +//! Some(War(state.0 + tug.0)) +//! } +//! } +//! +//! // To run the search we need an evaluator. +//! struct Eval; +//! impl minimax::Evaluator for Eval { +//! type G = TugOfWar; +//! fn evaluate(&self, state: &War) -> minimax::Evaluation { +//! if state.0 % 2 == 0 { +//! state.0 as minimax::Evaluation +//! } else { +//! -state.0 as minimax::Evaluation +//! } +//! } +//! } +//! +//! // Now we can use a simple Strategy to find a move from the initial state. +//! let start = War(0); +//! let mut strategy = minimax::Negamax::new(Eval{}, 3); +//! let best_move = strategy.choose_move(&start).unwrap(); +//! ``` pub mod interface; pub mod strategies; pub mod util; -pub use interface::{Evaluation, Evaluator, Game, Move, Player, Strategy, Winner}; -pub use strategies::negamax::{Negamax, Options}; +pub use interface::*; +pub use strategies::iterative::{IterativeOptions, IterativeSearch, Replacement}; +#[cfg(not(target_arch = "wasm32"))] +pub use strategies::mcts::{MCTSOptions, MonteCarloTreeSearch, RolloutPolicy}; +pub use strategies::negamax::Negamax; +pub use strategies::random::Random; +#[cfg(not(target_arch = "wasm32"))] +pub use strategies::ybw::{ParallelOptions, ParallelSearch}; +pub use util::perft; diff --git a/src/strategies/common.rs b/src/strategies/common.rs new file mode 100644 index 0000000..f5c1499 --- /dev/null +++ b/src/strategies/common.rs @@ -0,0 +1,127 @@ +use super::super::interface::*; + +use rand::Rng; + +// For values near winning and losing values, push them slightly closer to zero. +// A win in 3 moves (BEST-3) will be chosen over a win in 5 moves (BEST-5). +// A loss in 5 moves (WORST+5) will be chosen over a loss in 3 moves (WORST+3). +pub(super) fn clamp_value(value: Evaluation) -> Evaluation { + if value > BEST_EVAL - 100 { + value - 1 + } else if value < WORST_EVAL + 100 { + value + 1 + } else { + value + } +} + +// Undo any value clamping. +pub(super) fn unclamp_value(value: Evaluation) -> Evaluation { + if value > BEST_EVAL - 100 { + BEST_EVAL + } else if value < WORST_EVAL + 100 { + WORST_EVAL + } else { + value + } +} + +// Return a unique id for humans for this move. +pub(super) fn move_id(s: &::S, m: Option<::M>) -> String { + if let Some(mov) = m { + G::notation(s, mov).unwrap_or("no notation impl".to_string()) + } else { + "none".to_string() + } +} + +pub(super) fn pv_string(path: &[::M], state: &::S) -> String +where + ::M: Copy, + ::S: Clone, +{ + let mut state = state.clone(); + let mut out = String::new(); + for (i, &m) in (0..).zip(path.iter()) { + if i > 0 { + out.push_str("; "); + } + out.push_str(move_id::(&state, Some(m)).as_str()); + if let Some(new_state) = G::apply(&mut state, m) { + state = new_state; + } + } + out +} + +pub(super) fn move_to_front(m: M, moves: &mut [M]) { + for i in 0..moves.len() { + if moves[i] == m { + moves[0..i + 1].rotate_right(1); + break; + } + } +} + +// This exists to be wrapped in a mutex, because it didn't work when I tried a tuple. +pub(super) struct ValueMove { + pub(super) value: Evaluation, + pub(super) m: M, +} + +impl ValueMove { + pub(super) fn new(value: Evaluation, m: M) -> Self { + Self { value, m } + } + + #[cfg(not(target_arch = "wasm32"))] + pub(super) fn max(&mut self, value: Evaluation, m: M) { + if value > self.value { + self.value = value; + self.m = m; + } + } + + #[cfg(not(target_arch = "wasm32"))] + pub(super) fn into_inner(self) -> (Evaluation, M) { + (self.value, self.m) + } +} + +static PRIMES: [usize; 16] = [ + 14323, 18713, 19463, 30553, 33469, 45343, 50221, 51991, 53201, 56923, 64891, 72763, 74471, + 81647, 92581, 94693, +]; + +// Find and return the highest scoring element of the set. +// If multiple elements have the highest score, select one randomly. +// Constraints: +// - Don't call the scoring function more than once per element. +// - Select one uniformly, so that a run of high scores doesn't +// bias towards the one that scans first. +// - Don't shuffle the input or allocate a new array for shuffling. +// - Optimized for sets with <10k values. +pub(super) fn random_best f32>(set: &[T], score_fn: F) -> Option<&T> { + // To make the choice more uniformly random among the best moves, + // start at a random offset and stride by a random amount. + // The stride must be coprime with n, so pick from a set of 5 digit primes. + + let n = set.len(); + // Combine both random numbers into a single rng call. + let r = rand::rng().random_range(0..n * PRIMES.len()); + let mut i = r / PRIMES.len(); + let stride = PRIMES[r % PRIMES.len()]; + + let mut best_score = f32::NEG_INFINITY; + let mut best = None; + for _ in 0..n { + let score = score_fn(&set[i]); + debug_assert!(!score.is_nan()); + if score > best_score { + best_score = score; + best = Some(&set[i]); + } + i = (i + stride) % n; + } + best +} diff --git a/src/strategies/iterative.rs b/src/strategies/iterative.rs new file mode 100644 index 0000000..86189ca --- /dev/null +++ b/src/strategies/iterative.rs @@ -0,0 +1,748 @@ +//! An implementation of iterative deepening evaluation. +//! +//! Search and evaluate at depth 1, then start over at depth 2, then depth 3, +//! etc. Can keep going until a maximum depth or maximum time or either. Uses +//! a transposition table to reuse information from previous iterations. + +use super::super::interface::*; +use super::super::util::*; +use super::common::*; +#[cfg(not(target_arch = "wasm32"))] +use super::sync_util::timeout_signal; +use super::table::*; + +use instant::Instant; +use rand::prelude::SliceRandom; +use std::cmp::max; +#[cfg(not(target_arch = "wasm32"))] +use std::sync::atomic::{AtomicBool, Ordering}; +#[cfg(not(target_arch = "wasm32"))] +use std::sync::Arc; +use std::time::Duration; + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +/// Strategies for when to overwrite entries in the transition table. +pub enum Replacement { + Always, + DepthPreferred, + TwoTier, + // TODO: Bucket(size) +} + +struct TranspositionTable { + table: Vec>, + mask: usize, + // Incremented for each iterative deepening run. + // Values from old generations are always overwritten. + generation: u8, + strategy: Replacement, +} + +impl TranspositionTable { + fn new(table_byte_size: usize, strategy: Replacement) -> Self { + let size = (table_byte_size / std::mem::size_of::>()).next_power_of_two(); + let mask = if strategy == Replacement::TwoTier { (size - 1) & !1 } else { size - 1 }; + let mut table = Vec::with_capacity(size); + for _ in 0..size { + table.push(Entry:: { + high_hash: 0, + value: 0, + depth: 0, + flag: EntryFlag::Exact, + generation: 0, + best_move: None, + }); + } + Self { table, mask, generation: 0, strategy } + } +} + +impl Table for TranspositionTable { + fn lookup(&self, hash: u64) -> Option> { + let index = (hash as usize) & self.mask; + let entry = &self.table[index]; + if high_bits(hash) == entry.high_hash { + Some(*entry) + } else if self.strategy == Replacement::TwoTier { + let entry = &self.table[index + 1]; + if high_bits(hash) == entry.high_hash { + Some(*entry) + } else { + None + } + } else { + None + } + } + + fn store(&mut self, hash: u64, value: Evaluation, depth: u8, flag: EntryFlag, best_move: M) { + let dest = match self.strategy { + Replacement::Always => Some((hash as usize) & self.mask), + Replacement::DepthPreferred => { + let index = (hash as usize) & self.mask; + let entry = &self.table[index]; + if entry.generation != self.generation || entry.depth <= depth { + Some(index) + } else { + None + } + } + Replacement::TwoTier => { + // index points to the first of a pair of entries, the depth-preferred entry and the always-replace entry. + let index = (hash as usize) & self.mask; + let entry = &self.table[index]; + if entry.generation != self.generation || entry.depth <= depth { + Some(index) + } else { + Some(index + 1) + } + } + }; + if let Some(index) = dest { + self.table[index] = Entry { + high_hash: high_bits(hash), + value, + depth, + flag, + generation: self.generation, + best_move: Some(best_move), + } + } + } + + fn advance_generation(&mut self) { + self.generation = self.generation.wrapping_add(1); + } +} + +/// Options to use for the iterative search engines. +#[derive(Clone, Copy)] +pub struct IterativeOptions { + pub table_byte_size: usize, + pub(super) strategy: Replacement, + pub(super) null_window_search: bool, + pub(super) null_move_depth: Option, + pub(super) singular_extension: bool, + pub(super) aspiration_window: Option, + pub(super) mtdf: bool, + pub(super) step_increment: u8, + pub(super) max_quiescence_depth: u8, + pub(super) min_reorder_moves_depth: u8, + pub(super) countermove_table: bool, + pub(super) countermove_history_table: bool, + pub verbose: bool, +} + +impl IterativeOptions { + pub fn new() -> Self { + IterativeOptions { + table_byte_size: 1 << 20, + strategy: Replacement::TwoTier, + null_window_search: true, + null_move_depth: None, + singular_extension: false, + aspiration_window: None, + mtdf: false, + step_increment: 1, + max_quiescence_depth: 0, + min_reorder_moves_depth: u8::MAX, + countermove_table: false, + countermove_history_table: false, + verbose: false, + } + } +} + +impl Default for IterativeOptions { + fn default() -> Self { + Self::new() + } +} + +impl IterativeOptions { + /// Approximately how large the transposition table should be in memory. + pub fn with_table_byte_size(mut self, size: usize) -> Self { + self.table_byte_size = size; + self + } + + /// What rules to use when choosing whether to overwrite the current value + /// in the transposition table. + pub fn with_replacement_strategy(mut self, strategy: Replacement) -> Self { + self.strategy = strategy; + self + } + + /// Whether to add null-window searches to try to prune branches that are + /// probably worse than those already found. Also known as principal + /// variation search. + pub fn with_null_window_search(mut self, null: bool) -> Self { + self.null_window_search = null; + self + } + + /// Whether to attempt to cut off early by seeing if each node is amazing + /// even after passing the turn to the opponent. Null move search explores + /// the tree at a depth reduced by this amount. + pub fn with_null_move_depth(mut self, depth_reduction: u8) -> Self { + self.null_move_depth = Some(depth_reduction); + self + } + + /// Whether to extend a branch of the search (by 1) if there is only one + /// move (or only one reasonable move). + pub fn with_singular_extension(mut self) -> Self { + self.singular_extension = true; + self + } + + /// Whether to search first in a narrow window around the previous root + /// value on each iteration. + pub fn with_aspiration_window(mut self, window: Evaluation) -> Self { + self.aspiration_window = Some(window); + self + } + + /// Whether to search for the correct value in each iteration using only + /// null-window "Tests", with the + /// [MTD(f)](https://en.wikipedia.org/wiki/MTD%28f%29) algorithm. + /// Can be more efficient if the evaluation function is coarse grained. + pub fn with_mtdf(mut self) -> Self { + self.mtdf = true; + self + } + + /// Increment the depth by two between iterations. + pub fn with_double_step_increment(mut self) -> Self { + self.step_increment = 2; + self + } + + /// Enable [quiescence + /// search](https://en.wikipedia.org/wiki/Quiescence_search) at the leaves + /// of the search tree. The Evaluator must implement `generate_noisy_moves` + /// for the search to know when the state has become "quiet". + pub fn with_quiescence_search_depth(mut self, depth: u8) -> Self { + self.max_quiescence_depth = depth; + self + } + + /// Enable the Evaluator's move reordering after generating moves for all + /// nodes at this depth or higher. Reordering can be an expensive + /// operation, but it could cut off a lot of nodes if done well high in + /// the search tree. + pub fn with_min_reorder_moves_depth(mut self, depth: u8) -> Self { + self.min_reorder_moves_depth = depth; + self + } + + /// Enable the countermove table, which reorders to the front moves that + /// have worked to counter the previous move in other branches. + pub fn with_countermoves(mut self) -> Self { + self.countermove_table = true; + self + } + + /// Enable the countermove history table. It keeps a counter for moves + /// that have caused beta cutoffs in other branches, and reorders moves + /// based on this counter. + pub fn with_countermove_history(mut self) -> Self { + self.countermove_history_table = true; + self + } + + /// Enable verbose print statements of the ongoing performance of the search. + pub fn verbose(mut self) -> Self { + self.verbose = true; + self + } +} + +#[derive(Default)] +pub(crate) struct Stats { + pub(crate) nodes_explored: u64, + pub(crate) total_generate_move_calls: u64, + pub(crate) total_generated_moves: u64, +} + +impl Stats { + pub(crate) fn reset(&mut self) { + self.nodes_explored = 0; + self.total_generate_move_calls = 0; + self.total_generated_moves = 0; + } + pub(crate) fn explore_node(&mut self) { + self.nodes_explored += 1; + } + + pub(crate) fn generate_moves(&mut self, num_moves: usize) { + self.total_generate_move_calls += 1; + self.total_generated_moves += num_moves as u64; + } + + #[cfg(not(target_arch = "wasm32"))] + pub(crate) fn add(&mut self, other: &Self) { + self.nodes_explored += other.nodes_explored; + self.total_generate_move_calls += other.total_generate_move_calls; + self.total_generated_moves += other.total_generated_moves; + } +} + +pub(super) struct Negamaxer { + #[cfg(not(target_arch = "wasm32"))] + timeout: Arc, + #[cfg(target_arch = "wasm32")] + deadline: Instant, + #[cfg(target_arch = "wasm32")] + timeout_counter: u32, + pub(super) table: T, + pub(super) countermoves: CounterMoves, + move_pool: MovePool<::M>, + eval: E, + + opts: IterativeOptions, + pub(crate) stats: Stats, +} + +impl::M>> Negamaxer +where + ::M: Copy + Eq, +{ + pub(super) fn new(table: T, eval: E, opts: IterativeOptions) -> Self { + Self { + #[cfg(not(target_arch = "wasm32"))] + timeout: Arc::new(AtomicBool::new(false)), + #[cfg(target_arch = "wasm32")] + deadline: Instant::now(), + #[cfg(target_arch = "wasm32")] + timeout_counter: 1000, + table, + countermoves: CounterMoves::new(opts.countermove_table, opts.countermove_history_table), + eval, + move_pool: MovePool::default(), + opts, + stats: Stats::default(), + } + } + + #[cfg(not(target_arch = "wasm32"))] + pub(super) fn set_timeout(&mut self, timeout: Arc) { + self.timeout = timeout; + } + + #[cfg(target_arch = "wasm32")] + fn reset_timeout(&mut self, duration: Duration) { + self.timeout_counter = if duration == Duration::new(0, 0) { + // Too high counter that never hits the maximum. + 1000 + } else { + 0 + }; + self.deadline = Instant::now() + duration; + } + #[cfg(not(target_arch = "wasm32"))] + fn reset_timeout(&mut self, duration: Duration) { + self.set_timeout(if duration == Duration::new(0, 0) { + Arc::new(AtomicBool::new(false)) + } else { + timeout_signal(duration) + }); + } + + #[cfg(target_arch = "wasm32")] + fn timeout_check(&mut self) -> bool { + self.timeout_counter += 1; + if self.timeout_counter != 100 { + return false; + } + self.timeout_counter = 0; + Instant::now() >= self.deadline + } + #[cfg(not(target_arch = "wasm32"))] + fn timeout_check(&mut self) -> bool { + self.timeout.load(Ordering::Relaxed) + } + + fn null_move_check( + &mut self, s: &mut ::S, depth: u8, beta: Evaluation, + ) -> Option { + if let (Some(depth_reduction), Some(null_move)) = + (self.opts.null_move_depth, E::G::null_move(s)) + { + // Default to a minimum of depth=1 after null moving. + if depth > depth_reduction && + // If the position already seems pretty awesome. + self.eval.evaluate(s) >= beta + { + // If we just pass and let the opponent play this position (at reduced depth), + let mut nulled = AppliedMove::::new(s, null_move); + let value = + -self.negamax(&mut nulled, None, depth - depth_reduction, -beta, -beta + 1)?; + // is the result still so good that we shouldn't bother with a full search? + if value >= beta { + return Some(value); + } + } + } + // If we didn't check, return a low value that won't trigger beta cutoff. + Some(WORST_EVAL) + } + + // Negamax only among noisy moves. + fn noisy_negamax( + &mut self, s: &mut ::S, depth: u8, mut alpha: Evaluation, beta: Evaluation, + ) -> Option { + if self.timeout_check() { + return None; + } + if let Some(winner) = E::G::get_winner(s) { + return Some(winner.evaluate()); + } + if depth == 0 { + return Some(self.eval.evaluate(s)); + } + + let mut moves = self.move_pool.alloc(); + self.eval.generate_noisy_moves(s, &mut moves); + if moves.is_empty() { + self.move_pool.free(moves); + return Some(self.eval.evaluate(s)); + } + + let mut best = WORST_EVAL; + for m in moves.iter() { + let mut new = AppliedMove::::new(s, *m); + let value = -self.noisy_negamax(&mut new, depth - 1, -beta, -alpha)?; + best = max(best, value); + alpha = max(alpha, value); + if alpha >= beta { + break; + } + } + self.move_pool.free(moves); + Some(best) + } + + // Recursively compute negamax on the game state. Returns None if it hits the timeout. + pub(super) fn negamax( + &mut self, s: &mut ::S, prev_move: Option<::M>, mut depth: u8, + mut alpha: Evaluation, mut beta: Evaluation, + ) -> Option { + if self.timeout_check() { + return None; + } + + self.stats.explore_node(); + + if depth == 0 { + // Evaluate quiescence search on leaf nodes. + // Will just return the node's evaluation if quiescence search is disabled. + return self.noisy_negamax(s, self.opts.max_quiescence_depth, alpha, beta); + } + if let Some(winner) = E::G::get_winner(s) { + return Some(winner.evaluate()); + } + + let alpha_orig = alpha; + let hash = E::G::zobrist_hash(s); + let mut good_move = None; + if let Some(value) = self.table.check(hash, depth, &mut good_move, &mut alpha, &mut beta) { + return Some(value); + } + + if self.null_move_check(s, depth, beta)? >= beta { + return Some(beta); + } + + let mut moves = self.move_pool.alloc(); + E::G::generate_moves(s, &mut moves); + self.stats.generate_moves(moves.len()); + if moves.is_empty() { + self.move_pool.free(moves); + return Some(WORST_EVAL); + } + + // TODO: Also do a pre-search to look for moves much better than others. + if self.opts.singular_extension && moves.len() == 1 { + depth += 1; + } + + // Reorder moves. + if depth >= self.opts.min_reorder_moves_depth { + // TODO reorder moves + } + self.countermoves.reorder(prev_move, &mut moves); + if let Some(good) = good_move { + move_to_front(good, &mut moves); + } + + let mut best = WORST_EVAL; + let mut best_move = moves[0]; + let mut null_window = false; + for &m in moves.iter() { + let mut new = AppliedMove::::new(s, m); + let value = if null_window { + let probe = -self.negamax(&mut new, Some(m), depth - 1, -alpha - 1, -alpha)?; + if probe > alpha && probe < beta { + // Full search fallback. + -self.negamax(&mut new, Some(m), depth - 1, -beta, -probe)? + } else { + probe + } + } else { + -self.negamax(&mut new, Some(m), depth - 1, -beta, -alpha)? + }; + if value > best { + best = value; + best_move = m; + } + if value > alpha { + alpha = value; + // Now that we've found a good move, assume following moves + // are worse, and seek to cull them without full evaluation. + null_window = self.opts.null_window_search; + } + if alpha >= beta { + self.countermoves.update(prev_move, m); + break; + } + } + + self.table.update(hash, alpha_orig, beta, depth, best, best_move); + self.move_pool.free(moves); + Some(clamp_value(best)) + } + + // Try to find the value within a window around the estimated value. + // Results, whether exact, overshoot, or undershoot, are stored in the table. + pub(super) fn aspiration_search( + &mut self, s: &mut ::S, depth: u8, target: Evaluation, window: Evaluation, + ) -> Option<()> { + if depth < 2 { + // Do a full search on shallow nodes to establish the target. + return Some(()); + } + let alpha = max(target.saturating_sub(window), WORST_EVAL); + let beta = target.saturating_add(window); + self.negamax(s, None, depth, alpha, beta)?; + Some(()) + } + + pub(super) fn search_and_reorder( + &mut self, s: &mut ::S, moves: &mut [ValueMove<::M>], depth: u8, + ) -> Option { + let mut alpha = WORST_EVAL; + let beta = BEST_EVAL; + for value_move in moves.iter_mut() { + let mut new = AppliedMove::::new(s, value_move.m); + let value = -self.negamax(&mut new, Some(value_move.m), depth - 1, -beta, -alpha)?; + + alpha = max(alpha, value); + value_move.value = value; + } + moves.sort_by_key(|vm| -vm.value); + self.table.update(E::G::zobrist_hash(s), alpha, beta, depth, moves[0].value, moves[0].m); + Some(moves[0].value) + } +} + +pub struct IterativeSearch { + max_depth: u8, + max_time: Duration, + negamaxer: Negamaxer::M>>, + prev_value: Evaluation, + opts: IterativeOptions, + + // Runtime stats for the last move generated. + + // Maximum depth used to produce the move. + actual_depth: u8, + // Nodes explored at each depth. + nodes_explored: Vec, + pv: Vec<::M>, + wall_time: Duration, +} + +impl IterativeSearch +where + ::M: Copy + Eq, + ::S: Clone, +{ + pub fn new(eval: E, opts: IterativeOptions) -> IterativeSearch { + let table = TranspositionTable::new(opts.table_byte_size, opts.strategy); + let negamaxer = Negamaxer::new(table, eval, opts); + IterativeSearch { + max_depth: 99, + max_time: Duration::from_secs(5), + prev_value: 0, + negamaxer, + opts, + actual_depth: 0, + nodes_explored: Vec::new(), + pv: Vec::new(), + wall_time: Duration::default(), + } + } + + /// Return a human-readable summary of the last move generation. + pub fn stats(&self, s: &mut ::S) -> String { + let total_nodes_explored: u64 = self.nodes_explored.iter().sum(); + let mean_branching_factor = self.negamaxer.stats.total_generated_moves as f64 + / self.negamaxer.stats.total_generate_move_calls as f64; + let effective_branching_factor = (*self.nodes_explored.last().unwrap_or(&0) as f64) + .powf((self.actual_depth as f64 + 1.0).recip()); + let throughput = (total_nodes_explored + self.negamaxer.stats.nodes_explored) as f64 + / self.wall_time.as_secs_f64(); + format!("Principal variation: {}\nExplored {} nodes to depth {}. MBF={:.1} EBF={:.1}\nPartial exploration of next depth hit {} nodes.\n{} nodes/sec", + pv_string::(&self.pv[..], s), + total_nodes_explored, self.actual_depth, mean_branching_factor, effective_branching_factor, + self.negamaxer.stats.nodes_explored, throughput as usize) + } + + #[doc(hidden)] + pub fn root_value(&self) -> Evaluation { + unclamp_value(self.prev_value) + } + + fn mtdf( + &mut self, s: &mut ::S, depth: u8, mut guess: Evaluation, + ) -> Option { + let mut lowerbound = WORST_EVAL; + let mut upperbound = BEST_EVAL; + while lowerbound < upperbound { + let beta = max(lowerbound + 1, guess); + if self.opts.verbose { + eprintln!( + "mtdf depth={} guess={} bounds={}:{}", + depth, beta, lowerbound, upperbound + ); + } + guess = self.negamaxer.negamax(s, None, depth, beta - 1, beta)?; + if guess < beta { + upperbound = guess; + } else { + lowerbound = guess; + } + } + Some(guess) + } +} + +impl Strategy for IterativeSearch +where + ::S: Clone, + ::M: Copy + Eq, +{ + fn choose_move(&mut self, s: &::S) -> Option<::M> { + if E::G::get_winner(s).is_some() { + return None; + } + self.negamaxer.table.advance_generation(); + self.negamaxer.countermoves.advance_generation(E::G::null_move(s)); + // Reset stats. + self.nodes_explored.clear(); + self.negamaxer.stats.reset(); + self.actual_depth = 0; + let start_time = Instant::now(); + // Start timer if configured. + self.negamaxer.reset_timeout(self.max_time); + + let root_hash = E::G::zobrist_hash(s); + let mut s_clone = s.clone(); + let mut best_move = None; + let mut interval_start; + // Store the moves so they can be reordered every iteration. + let mut moves = Vec::new(); + E::G::generate_moves(&s_clone, &mut moves); + // Start in a random order. + moves.shuffle(&mut rand::rng()); + let mut moves = moves.into_iter().map(|m| ValueMove::new(0, m)).collect::>(); + + // Start at 1 or 2 to hit the max depth. + let mut depth = self.max_depth % self.opts.step_increment; + if depth == 0 { + depth = self.opts.step_increment; + } + while depth <= self.max_depth { + interval_start = Instant::now(); + let search = if self.opts.mtdf { + self.mtdf(&mut s_clone, depth, self.prev_value) + } else { + if let Some(window) = self.opts.aspiration_window { + // Results of the search are stored in the table. + if self + .negamaxer + .aspiration_search(&mut s_clone, depth, self.prev_value, window) + .is_none() + { + // Timeout. + break; + } + if self.opts.verbose { + if let Some(entry) = self.negamaxer.table.lookup(root_hash) { + let end = Instant::now(); + let interval = end - interval_start; + eprintln!( + "Iterative aspiration depth{:>2} took{:>5}ms; bounds{:>5}; bestmove={}", + depth, + interval.as_millis(), + entry.bounds(), + move_id::(&s_clone, entry.best_move) + ); + interval_start = end; + } + } + } + + self.negamaxer.search_and_reorder(&mut s_clone, &mut moves[..], depth) + }; + if search.is_none() { + // Timeout. Return the best move from the previous depth. + break; + } + let entry = self.negamaxer.table.lookup(root_hash).unwrap(); + best_move = entry.best_move; + + if self.opts.verbose { + let interval = Instant::now() - interval_start; + eprintln!( + "Iterative fullsearch depth{:>2} took{:>5}ms; value{:>6}; bestmove={}", + depth, + interval.as_millis(), + entry.value_string(), + move_id::(&s_clone, best_move) + ); + } + + self.actual_depth = max(self.actual_depth, depth); + self.nodes_explored.push(self.negamaxer.stats.nodes_explored); + self.negamaxer.stats.nodes_explored = 0; + self.prev_value = entry.value; + depth += self.opts.step_increment; + self.negamaxer.table.populate_pv::(&mut self.pv, &s_clone); + if unclamp_value(entry.value).abs() == BEST_EVAL { + break; + } + } + self.wall_time = start_time.elapsed(); + if self.opts.verbose { + let mut s_clone = s.clone(); + eprintln!("{}", self.stats(&mut s_clone)); + } + best_move + } + + fn set_timeout(&mut self, max_time: Duration) { + self.max_time = max_time; + self.max_depth = 99; + } + + fn set_max_depth(&mut self, depth: u8) { + self.max_depth = depth; + self.max_time = Duration::new(0, 0); + } + + fn principal_variation(&self) -> Vec<::M> { + self.pv.clone() + } +} diff --git a/src/strategies/mcts.rs b/src/strategies/mcts.rs new file mode 100644 index 0000000..a8a5161 --- /dev/null +++ b/src/strategies/mcts.rs @@ -0,0 +1,470 @@ +use super::super::interface::*; +use super::super::util::AppliedMove; +use super::common::{move_id, pv_string, random_best}; +use super::sync_util::*; + +use rand::prelude::IndexedRandom; +use rand::rngs::SmallRng; +use rand::SeedableRng; +use std::marker::PhantomData; +use std::sync::atomic::Ordering::{Relaxed, SeqCst}; +use std::sync::atomic::{AtomicBool, AtomicI32, AtomicU32}; +use std::sync::Arc; +use std::thread; +use std::time::{Duration, Instant}; + +const WIN: i32 = i32::MAX; +// Make sure they negate to each other, unlike i32::MIN. +const LOSS: i32 = -WIN; + +struct Node { + // The Move to get from the parent to here. + // Only None at the root. + m: Option, + visits: AtomicU32, + // +1 for wins, -1 for losses, +0 for draws. + // From perspective of the player that made this move. + score: AtomicI32, + // Lazily populated if this node guarantees a particular end state. + // WIN for a guaranteed win, LOSS for a guaranteed loss. + // Not bothering with draws. + winner: AtomicI32, + // Lazily populated. + expansion: AtomicBox>, +} + +struct NodeExpansion { + children: Vec>, +} + +fn new_expansion(state: &G::S) -> Box> { + let mut moves = Vec::new(); + G::generate_moves(state, &mut moves); + let children = moves.into_iter().map(|m| Node::new(Some(m))).collect::>(); + Box::new(NodeExpansion { children }) +} + +impl Node { + fn new(m: Option) -> Self { + Node { + m, + expansion: AtomicBox::default(), + visits: AtomicU32::new(0), + score: AtomicI32::new(0), + winner: AtomicI32::new(0), + } + } + + // Choose best child based on UCT. + fn best_child(&self, exploration_score: f32) -> Option<&Node> { + let mut log_visits = (self.visits.load(SeqCst) as f32).log2(); + // Keep this numerator non-negative. + if log_visits < 0.0 { + log_visits = 0.0; + } + + let expansion = self.expansion.get()?; + random_best(expansion.children.as_slice(), |node| { + node.uct_score(exploration_score, log_visits) + }) + } + + fn uct_score(&self, exploration_score: f32, log_parent_visits: f32) -> f32 { + let winner = self.winner.load(Relaxed); + if winner < 0 { + // Large enough to be returned from best_move, smaller than any other value. + // This effectively ignores any moves that we've proved guarantee losses. + // The MCTS-Solver paper says not to do this, but I don't buy their argument. + // Those moves effectivey won't exist in our search, and we'll + // have to see if the remaining moves make the parent moves worthwhile. + return -1.0; + } + if winner > 0 { + return f32::INFINITY; + } + let visits = self.visits.load(Relaxed) as f32; + let score = self.score.load(Relaxed) as f32; + if visits == 0.0 { + // Avoid NaNs. + return if exploration_score > 0.0 { f32::INFINITY } else { 0.0 }; + } + let win_ratio = (score + visits) / (2.0 * visits); + win_ratio + exploration_score * (2.0 * log_parent_visits / visits).sqrt() + } + + fn pre_update_stats(&self) { + // Use a technicque called virtual loss to assume we've lost any + // ongoing simulation to bias concurrent threads against exploring it. + self.visits.fetch_add(1, SeqCst); + self.score.fetch_add(-1, SeqCst); + } + + fn update_stats(&self, result: i32) -> Option { + if result == WIN || result == LOSS { + self.winner.store(result, SeqCst); + } else { + // Adjust for virtual loss. + self.score.fetch_add(result + 1, SeqCst); + } + // Always return Some, as we aren't timed out. + Some(result) + } +} + +/// Options for MonteCarloTreeSearch. +#[derive(Clone)] +pub struct MCTSOptions { + pub verbose: bool, + max_rollout_depth: u32, + rollouts_before_expanding: u32, + // None means use num_cpus. + num_threads: Option, +} + +impl Default for MCTSOptions { + fn default() -> Self { + Self { + verbose: false, + max_rollout_depth: 100, + rollouts_before_expanding: 0, + num_threads: None, + } + } +} + +impl MCTSOptions { + /// Enable verbose print statements after each search. + pub fn verbose(mut self) -> Self { + self.verbose = true; + self + } + + /// Set a maximum depth for rollouts. Rollouts that reach this depth are + /// stopped and assigned a Draw value. + pub fn with_max_rollout_depth(mut self, depth: u32) -> Self { + self.max_rollout_depth = depth; + self + } + + /// How many rollouts to run on a single leaf node before expanding its + /// children. The default value is 0, where every rollout expands some + /// leaf node. + pub fn with_rollouts_before_expanding(mut self, rollouts: u32) -> Self { + self.rollouts_before_expanding = rollouts; + self + } + + /// How many threads to run. Defaults to num_cpus. + pub fn with_num_threads(mut self, threads: usize) -> Self { + self.num_threads = Some(threads); + self + } +} + +/// Advanced random rollout policy for Monte Carlo Tree Search. +pub trait RolloutPolicy { + /// The type of game that can be evaluated. + type G: Game; + + /// Custom function to choose random move during rollouts. + /// Implementations can bias towards certain moves, ensure winning moves, etc. + /// The provided move vec is for scratch space. + fn random_move( + &self, state: &mut ::S, move_scratch: &mut Vec<::M>, + rng: &mut SmallRng, + ) -> ::M; + + /// Implementation of a rollout over many random moves. Not needed to be overridden. + fn rollout(&self, options: &MCTSOptions, state: &::S) -> i32 + where + ::S: Clone, + { + let mut rng = SmallRng::from_rng(&mut rand::rng()); + let mut depth = options.max_rollout_depth; + let mut state = state.clone(); + let mut moves = Vec::new(); + let mut sign = 1; + loop { + if let Some(winner) = Self::G::get_winner(&state) { + let first = depth == options.max_rollout_depth; + return match winner { + Winner::PlayerJustMoved => { + if first { + WIN + } else { + 1 + } + } + Winner::PlayerToMove => { + if first { + LOSS + } else { + -1 + } + } + Winner::Draw => 0, + } * sign; + } + + if depth == 0 { + return 0; + } + + moves.clear(); + let m = self.random_move(&mut state, &mut moves, &mut rng); + if let Some(new_state) = Self::G::apply(&mut state, m) { + state = new_state; + } + sign = -sign; + depth -= 1; + } + } +} + +struct DumbRolloutPolicy { + game_type: PhantomData, +} + +impl RolloutPolicy for DumbRolloutPolicy { + type G = G; + fn random_move( + &self, state: &mut ::S, moves: &mut Vec<::M>, + rng: &mut SmallRng, + ) -> ::M { + G::generate_moves(state, moves); + *moves.choose(rng).unwrap() + } +} + +/// A strategy that uses random playouts to explore the game tree to decide on the best move. +/// This can be used without an Evaluator, just using the rules of the game. +pub struct MonteCarloTreeSearch { + options: MCTSOptions, + max_rollouts: u32, + max_time: Duration, + timeout: Arc, + rollout_policy: Option + Send + Sync>>, + pv: Vec, + game_type: PhantomData, +} + +impl MonteCarloTreeSearch { + pub fn new(options: MCTSOptions) -> Self { + Self { + options, + max_rollouts: 0, + max_time: Duration::from_secs(5), + timeout: Arc::new(AtomicBool::new(false)), + rollout_policy: None, + pv: Vec::new(), + game_type: PhantomData, + } + } + + /// Create a searcher with a custom rollout policy. You could bias the + /// random move generation to prefer certain kinds of moves, always choose + /// winning moves, etc. + pub fn new_with_policy( + options: MCTSOptions, policy: Box + Send + Sync>, + ) -> Self { + Self { + options, + max_rollouts: 0, + max_time: Duration::from_secs(5), + timeout: Arc::new(AtomicBool::new(false)), + rollout_policy: Some(policy), + pv: Vec::new(), + game_type: PhantomData, + } + } + + /// Instead of a timeout, run this many rollouts to choose a move. + pub fn set_max_rollouts(&mut self, rollouts: u32) { + self.max_time = Duration::default(); + self.max_rollouts = rollouts; + } + + fn rollout(&self, state: &G::S) -> i32 + where + G: Sync, + G::S: Clone, + { + self.rollout_policy.as_ref().map(|p| p.rollout(&self.options, state)).unwrap_or_else(|| { + DumbRolloutPolicy:: { game_type: PhantomData }.rollout(&self.options, state) + }) + } + + // Explore the tree, make a new node, rollout, backpropagate. + fn simulate(&self, node: &Node, state: &mut G::S, mut force_rollout: bool) -> Option + where + G: Sync, + G::S: Clone, + { + if self.timeout.load(Relaxed) { + return None; + } + let winner = node.winner.load(Relaxed); + if winner != 0 { + return Some(winner); + } + node.pre_update_stats(); + + if force_rollout { + return node.update_stats(self.rollout(state)); + } + + let expansion = match node.expansion.get() { + Some(expansion) => expansion, + None => { + // This is a leaf node. + if node.visits.load(SeqCst) <= self.options.rollouts_before_expanding { + // Just rollout from here. + return node.update_stats(self.rollout(state)); + } else { + // Check for terminal node. + match G::get_winner(state) { + Some(Winner::PlayerJustMoved) => return node.update_stats(WIN), + Some(Winner::PlayerToMove) => return node.update_stats(LOSS), + Some(Winner::Draw) => return node.update_stats(0), + _ => {} + } + // Expand this node, and force a rollout when we recurse. + force_rollout = true; + node.expansion.try_set(new_expansion::(state)) + } + } + }; + + // Recurse. + let next = match node.best_child(1.) { + Some(child) => child, + // TODO: Weird race condition? + None => return Some(0), + }; + let m = next.m.as_ref().unwrap(); + let mut new = AppliedMove::::new(state, *m); + let child_result = self.simulate(next, &mut new, force_rollout)?; + + // Propagate up forced wins and losses. + let result = if child_result == WIN { + // Having a guaranteed win child makes you a loser parent. + LOSS + } else if child_result == LOSS { + // Having all guaranteed loser children makes you a winner parent. + if expansion.children.iter().all(|node| node.winner.load(Relaxed) == LOSS) { + WIN + } else { + -1 + } + } else { + -child_result + }; + + // Backpropagate. + node.update_stats(result) + } +} + +impl Strategy for MonteCarloTreeSearch +where + G: Sync, + G::S: Clone + Send, + G::M: Copy + Sync, +{ + fn choose_move(&mut self, s: &G::S) -> Option { + let start_time = Instant::now(); + let root = Box::new(Node::::new(None)); + root.expansion.try_set(new_expansion::(s)); + + let num_threads = self.options.num_threads.unwrap_or_else(num_cpus::get) as u32; + let (rollouts_per_thread, extra) = if self.max_rollouts == 0 { + (u32::MAX, 0) + } else { + let rollouts_per_thread = self.max_rollouts / num_threads; + (rollouts_per_thread, self.max_rollouts - rollouts_per_thread * num_threads) + }; + self.timeout = if self.max_time == Duration::default() { + Arc::new(AtomicBool::new(false)) + } else { + timeout_signal(self.max_time) + }; + + thread::scope(|scope| { + for i in 0..num_threads { + let node = &*root; + let mtcs = &*self; + let mut state = s.clone(); + scope.spawn(move || { + let rollouts = rollouts_per_thread + (i < extra) as u32; + for _ in 0..rollouts { + if mtcs.simulate(node, &mut state, false).is_none() { + break; + } + } + }); + } + }); + + // Compute PV. + self.pv.clear(); + let mut node = &*root; + while let Some(best) = node.best_child(0.0) { + self.pv.push(best.m.unwrap()); + node = best; + } + + if self.options.verbose { + let total_visits = root.visits.load(Relaxed); + let duration = Instant::now().duration_since(start_time); + let rate = total_visits as f64 / num_threads as f64 / duration.as_secs_f64(); + eprintln!( + "Using {} threads, did {} total simulations with {:.1} rollouts/sec/core", + num_threads, total_visits, rate + ); + // Sort moves by visit count, largest first. + let mut children = root + .expansion + .get()? + .children + .iter() + .map(|node| (node.visits.load(Relaxed), node.score.load(Relaxed), node.m)) + .collect::>(); + children.sort_by_key(|t| !t.0); + + // Dump stats about the top 10 nodes. + for (visits, score, m) in children.into_iter().take(10) { + // Normalized so all wins is 100%, all draws is 50%, and all losses is 0%. + let win_rate = (score as f64 + visits as f64) / (visits as f64 * 2.0); + eprintln!( + "{:>6} visits, {:.02}% wins: {}", + visits, + win_rate * 100.0, + move_id::(s, m) + ); + } + + // Dump PV. + eprintln!("Principal variation: {}", pv_string::(&self.pv[..], s)); + } + + let exploration = 0.0; // Just get best node. + root.best_child(exploration).map(|node| node.m.unwrap()) + } + + fn set_timeout(&mut self, timeout: Duration) { + self.max_rollouts = 0; + self.max_time = timeout; + } + + fn set_max_depth(&mut self, depth: u8) { + // Set some arbitrary function of rollouts. + self.max_time = Duration::default(); + self.max_rollouts = 5u32 + .saturating_pow(depth as u32) + .saturating_mul(self.options.rollouts_before_expanding + 1); + } + + fn principal_variation(&self) -> Vec { + self.pv.clone() + } +} diff --git a/src/strategies/mod.rs b/src/strategies/mod.rs index 8855f06..c7a292d 100644 --- a/src/strategies/mod.rs +++ b/src/strategies/mod.rs @@ -1,4 +1,14 @@ //! Strategy implementations. +pub mod iterative; +#[cfg(not(target_arch = "wasm32"))] +pub mod mcts; pub mod negamax; pub mod random; +#[cfg(not(target_arch = "wasm32"))] +pub mod ybw; + +mod common; +#[cfg(not(target_arch = "wasm32"))] +mod sync_util; +mod table; diff --git a/src/strategies/negamax.rs b/src/strategies/negamax.rs index a9c0778..878eb0b 100644 --- a/src/strategies/negamax.rs +++ b/src/strategies/negamax.rs @@ -1,97 +1,104 @@ //! An implementation of Negamax. //! -//! Currently, only the basic alpha-pruning variant is implemented. Further work -//! could add advanced features, like history and/or transposition tables. This -//! picks randomly among the "best" moves, so that it's non-deterministic. +//! With only the basic alpha-pruning implemented. This picks randomly among +//! the "best" moves, so that it's non-deterministic. use super::super::interface::*; -use rand; -use rand::Rng; +use super::super::util::*; +use super::common::*; +use rand::seq::SliceRandom; use std::cmp::max; -use std::marker::PhantomData; -fn negamax(s: &mut ::S, - depth: usize, - mut alpha: Evaluation, - beta: Evaluation, - p: Player) - -> Evaluation - where <::G as Game>::M: Copy -{ - let maybe_winner = E::G::get_winner(s); - if depth == 0 || maybe_winner.is_some() { - return p * E::evaluate(s, maybe_winner); - } - let mut moves = [None; 100]; - E::G::generate_moves(s, p, &mut moves); - let mut best = Evaluation::Worst; - for m in moves.iter().take_while(|om| om.is_some()).map(|om| om.unwrap()) { - m.apply(s); - let value = -negamax::(s, depth - 1, -beta, -alpha, -p); - m.undo(s); - best = max(best, value); - alpha = max(alpha, value); - if alpha >= beta { - break - } - } - best -} - -/// Options to use for the `Negamax` engine. -pub struct Options { - /// The maximum depth within the game tree. - pub max_depth: usize, -} - -pub struct Negamax { - opts: Options, - rng: rand::ThreadRng, - _eval: PhantomData, +pub struct Negamax { + max_depth: u8, + move_pool: MovePool<::M>, + rng: rand::rngs::ThreadRng, + prev_value: Evaluation, + eval: E, } impl Negamax { - pub fn new(opts: Options) -> Negamax { + pub fn new(eval: E, depth: u8) -> Negamax { Negamax { - opts: opts, - rng: rand::thread_rng(), - _eval: PhantomData, + max_depth: depth, + move_pool: MovePool::<_>::default(), + rng: rand::rng(), + prev_value: 0, + eval, } } + + #[doc(hidden)] + pub fn root_value(&self) -> Evaluation { + unclamp_value(self.prev_value) + } + + fn negamax( + &mut self, s: &mut ::S, depth: u8, mut alpha: Evaluation, beta: Evaluation, + ) -> Evaluation + where + <::G as Game>::M: Copy, + { + if let Some(winner) = E::G::get_winner(s) { + return winner.evaluate(); + } + if depth == 0 { + return self.eval.evaluate(s); + } + let mut moves = self.move_pool.alloc(); + E::G::generate_moves(s, &mut moves); + let mut best = WORST_EVAL; + for m in moves.iter() { + let mut new = AppliedMove::::new(s, *m); + let value = -self.negamax(&mut new, depth - 1, -beta, -alpha); + best = max(best, value); + alpha = max(alpha, value); + if alpha >= beta { + break; + } + } + self.move_pool.free(moves); + clamp_value(best) + } } impl Strategy for Negamax - where ::S: Clone, - ::M: Copy { - fn choose_move(&mut self, s: &::S, p: Player) -> Option<::M> { - let mut best = Evaluation::Worst; - let mut moves = [None; 100]; - E::G::generate_moves(s, p, &mut moves); - let mut candidate_moves = Vec::new(); +where + ::S: Clone, + ::M: Copy, +{ + fn choose_move(&mut self, s: &::S) -> Option<::M> { + if self.max_depth == 0 { + return None; + } + if E::G::get_winner(s).is_some() { + return None; + } + let mut best = WORST_EVAL; + let mut moves = self.move_pool.alloc(); + E::G::generate_moves(s, &mut moves); + // Randomly permute order that we look at the moves. + // We'll pick the first best score from this list. + moves.shuffle(&mut self.rng); + + let mut best_move = *moves.first()?; let mut s_clone = s.clone(); - for m in moves.iter().take_while(|m| m.is_some()).map(|m| m.unwrap()) { + for &m in moves.iter() { // determine value for this move - m.apply(&mut s_clone); - let value = -negamax::(&mut s_clone, - self.opts.max_depth, - Evaluation::Worst, - Evaluation::Best, - -p); - m.undo(&mut s_clone); - // this move is a candidate move - if value == best { - candidate_moves.push(m); - // this move is better than any previous, so it's the sole candidate - } else if value > best { - candidate_moves.clear(); - candidate_moves.push(m); + let mut new = AppliedMove::::new(&mut s_clone, m); + let value = -self.negamax(&mut new, self.max_depth - 1, WORST_EVAL, -best); + // Strictly better than any move found so far. + if value > best { best = value; + best_move = m; } } - if candidate_moves.is_empty() { - None - } else { - Some(candidate_moves[self.rng.gen_range(0, candidate_moves.len())]) - } + self.move_pool.free(moves); + self.prev_value = best; + Some(best_move) + } + + fn set_max_depth(&mut self, depth: u8) { + self.max_depth = depth; } } diff --git a/src/strategies/random.rs b/src/strategies/random.rs index 56b560c..974d614 100644 --- a/src/strategies/random.rs +++ b/src/strategies/random.rs @@ -1,26 +1,32 @@ //! A strategy that randomly chooses a move, for use in tests. use super::super::interface::*; -use rand; -use rand::Rng; +use rand::prelude::IndexedRandom; +use std::marker::PhantomData; -pub struct Random { - rng: rand::ThreadRng, +pub struct Random { + game_type: PhantomData, } -impl Random { - pub fn new() -> Random { - Random { rng: rand::thread_rng() } +impl Random { + pub fn new() -> Self { + Self { game_type: PhantomData } } } -impl Strategy for Random - where G::M: Copy { - fn choose_move(&mut self, s: &G::S, p: Player) -> Option { - let mut moves: [Option; 100] = [None; 100]; - match G::generate_moves(s, p, &mut moves) { - 0 => None, - num_moves => Some(moves[self.rng.gen_range(0, num_moves)].unwrap()), - } +impl Default for Random { + fn default() -> Self { + Random::new() + } +} + +impl Strategy for Random +where + G::M: Copy, +{ + fn choose_move(&mut self, s: &G::S) -> Option { + let mut moves = Vec::new(); + G::generate_moves(s, &mut moves); + moves.choose(&mut rand::rng()).copied() } } diff --git a/src/strategies/sync_util.rs b/src/strategies/sync_util.rs new file mode 100644 index 0000000..cc0b3f2 --- /dev/null +++ b/src/strategies/sync_util.rs @@ -0,0 +1,167 @@ +use std::ops::{Deref, DerefMut}; +use std::sync::atomic::{AtomicBool, AtomicPtr, AtomicUsize, Ordering}; +use std::sync::Arc; +use std::thread::{sleep, spawn}; +use std::time::Duration; + +use rayon::prelude::*; + +pub(super) fn timeout_signal(dur: Duration) -> Arc { + // Theoretically we could include an async runtime to do this and use + // fewer threads, but the stdlib implementation is only a few lines... + let signal = Arc::new(AtomicBool::new(false)); + let signal2 = signal.clone(); + spawn(move || { + sleep(dur); + signal2.store(true, Ordering::Relaxed); + }); + signal +} + +// An insert-only lock-free Option> +pub(super) struct AtomicBox(AtomicPtr); + +impl Default for AtomicBox { + fn default() -> Self { + Self(AtomicPtr::default()) + } +} + +impl AtomicBox { + // Tries to set the AtomicBox to this value if empty. + // Returns a reference to whatever is in the box. + pub(super) fn try_set(&self, value: Box) -> &T { + let ptr = Box::into_raw(value); + // Try to replace nullptr with the value. + let ret_ptr = if let Err(new_ptr) = + self.0.compare_exchange(std::ptr::null_mut(), ptr, Ordering::SeqCst, Ordering::SeqCst) + { + // If someone beat us to it, return the original drop the new one. + unsafe { drop(Box::from_raw(ptr)) }; + new_ptr + } else { + ptr + }; + unsafe { ret_ptr.as_ref().unwrap() } + } + + pub(super) fn get(&self) -> Option<&T> { + let ptr = self.0.load(Ordering::Relaxed); + unsafe { ptr.as_ref() } + } +} + +impl Drop for AtomicBox { + fn drop(&mut self) { + let ptr = *self.0.get_mut(); + if !ptr.is_null() { + unsafe { drop(Box::from_raw(ptr)) }; + } + } +} + +#[test] +fn test_atomic_box() { + let b = AtomicBox::::default(); + assert_eq!(None, b.get()); + b.try_set(Box::new(3)); + assert_eq!(Some(&3), b.get()); + b.try_set(Box::new(4)); + assert_eq!(Some(&3), b.get()); +} + +// Safe API around lockfree threadlocals for rayon threadpools. +// Only safe when used from a single threadpool, as this crate does. +pub(super) struct ThreadLocal { + // Our owned reference to all the locals. + locals: Vec, + // Mutable reference from which each thread finds its local. + ptr: *mut T, +} + +// Values are only accessed from their individual threads and references do not leak. +unsafe impl Send for ThreadLocal {} +unsafe impl Sync for ThreadLocal {} + +impl ThreadLocal { + pub(super) fn new T>(f: F, pool: &rayon::ThreadPool) -> Self { + let n = pool.current_num_threads(); + let mut locals = (0..n).map(|_| f()).collect::>(); + let ptr = locals.as_mut_ptr(); + Self { locals, ptr } + } + + pub(super) fn local_do(&self, f: F) { + // It would be nice to keep a handle to the threadpool to ensure this + // thread is from only our pool, but the lifetimes seem too + // restrictive. + let index = rayon::current_thread_index().unwrap(); + assert!(index < self.locals.len()); + f(unsafe { self.ptr.add(index).as_mut().unwrap() }); + } + + // With a &mut self, no other threads can be using it. + pub(super) fn do_all_mut(&mut self, f: F) { + self.locals.iter_mut().for_each(f); + } + + // This is extremely suspicious, but parallel-reads mapping each threadlocal + // into an output element. Good for interior mutability tasks. + pub(super) fn do_all(&self, f: F) { + self.locals.iter().for_each(f); + } +} + +#[test] +fn test_threadlocal() { + use rayon::prelude::*; + let pool = rayon::ThreadPoolBuilder::new().build().unwrap(); + let mut tls = ThreadLocal::::new(|| 0, &pool); + let count = 100000; + (0..count).into_par_iter().for_each(|_| tls.local_do(|x| *x += 1)); + let mut sum = 0; + tls.do_all_mut(|x| sum += *x); + assert_eq!(sum, count); + + let result = std::panic::catch_unwind(|| { + // Invalid call from outside pool. + tls.local_do(|x| *x += 1); + }); + assert!(result.is_err()); +} + +// 64-bytes is a common cache line size. +#[repr(align(64))] +pub(super) struct CachePadded { + value: T, +} + +impl Default for CachePadded { + fn default() -> Self { + Self { value: T::default() } + } +} + +impl Deref for CachePadded { + type Target = T; + + fn deref(&self) -> &T { + &self.value + } +} + +impl DerefMut for CachePadded { + fn deref_mut(&mut self) -> &mut T { + &mut self.value + } +} + +/// Iterate through the given array in parallel, but whenever any +/// thread starts a new item it gets the next one in line. +pub(super) fn par_iter_in_order(array: &[T]) -> impl ParallelIterator { + let index = AtomicUsize::new(0); + (0..array.len()) + .into_par_iter() + .with_max_len(1) + .map(move |_| &array[index.fetch_add(1, Ordering::SeqCst)]) +} diff --git a/src/strategies/table.rs b/src/strategies/table.rs new file mode 100644 index 0000000..76d8363 --- /dev/null +++ b/src/strategies/table.rs @@ -0,0 +1,441 @@ +use super::common::{move_to_front, unclamp_value}; +use crate::interface::*; +use std::cmp::{max, min}; +use std::sync::atomic::{AtomicU32, AtomicU8, Ordering}; +use std::sync::Arc; + +// Common transposition table stuff. + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub(super) enum EntryFlag { + Exact, + Upperbound, + Lowerbound, +} + +#[derive(Copy, Clone)] +#[repr(align(16))] +pub(super) struct Entry { + pub(super) high_hash: u32, + pub(super) value: Evaluation, + pub(super) depth: u8, + pub(super) flag: EntryFlag, + pub(super) generation: u8, + pub(super) best_move: Option, +} + +#[test] +fn test_entry_size() { + assert!(std::mem::size_of::>() <= 16); + assert!(std::mem::size_of::>() <= 16); +} + +pub(super) fn high_bits(hash: u64) -> u32 { + (hash >> 32) as u32 +} + +impl Entry { + pub(super) fn value_string(&self) -> String { + match unclamp_value(self.value) { + WORST_EVAL => "-∞".to_owned(), + BEST_EVAL => "∞".to_owned(), + value => value.to_string(), + } + } + + pub(super) fn bounds(&self) -> String { + match self.flag { + EntryFlag::Exact => "=", + EntryFlag::Upperbound => "≤", + EntryFlag::Lowerbound => "≥", + } + .to_string() + + &self.value_string() + } +} + +// A trait for a transposition table. The methods are mutual exclusion, but +// the idea is that an implementation can wrap a shared concurrent table. +pub(super) trait Table { + fn lookup(&self, hash: u64) -> Option>; + fn store(&mut self, hash: u64, value: Evaluation, depth: u8, flag: EntryFlag, best_move: M); + fn advance_generation(&mut self); + + // Check and update negamax state based on any transposition table hit. + // Returns Some(value) on an exact match. + // Returns None, updating mutable arguments, if Negamax should continue to explore this node. + fn check( + &self, hash: u64, depth: u8, good_move: &mut Option, alpha: &mut Evaluation, + beta: &mut Evaluation, + ) -> Option { + if let Some(entry) = self.lookup(hash) { + *good_move = entry.best_move; + if entry.depth >= depth { + match entry.flag { + EntryFlag::Exact => { + return Some(entry.value); + } + EntryFlag::Lowerbound => { + *alpha = max(*alpha, entry.value); + } + EntryFlag::Upperbound => { + *beta = min(*beta, entry.value); + } + } + if *alpha >= *beta { + return Some(entry.value); + } + } + } + None + } + + // Update table based on negamax results. + fn update( + &mut self, hash: u64, alpha_orig: Evaluation, beta: Evaluation, depth: u8, + best: Evaluation, best_move: M, + ) { + let flag = if best <= alpha_orig { + EntryFlag::Upperbound + } else if best >= beta { + EntryFlag::Lowerbound + } else { + EntryFlag::Exact + }; + self.store(hash, best, depth, flag, best_move); + } + + // After finishing a search, populate the principal variation as deep as + // the table remembers it. + fn populate_pv>(&self, pv: &mut Vec, state: &G::S) + where + G::S: Clone, + { + pv.clear(); + let mut hash_history = Vec::new(); + let mut state = state.clone(); + let mut hash = G::zobrist_hash(&state); + while let Some(entry) = self.lookup(hash) { + // The principal variation should only have exact nodes, as other + // node types are from cutoffs where the node is proven to be + // worse than a previously explored one. + // + // Sometimes, it takes multiple rounds of narrowing bounds for the + // value to be exact, and we can't guarantee that the table entry + // will remain in the table between the searches that find + // equivalent upper and lower bounds. + let m = entry.best_move.unwrap(); + pv.push(m); + if let Some(new_state) = G::apply(&mut state, m) { + state = new_state; + } + hash = G::zobrist_hash(&state); + // Prevent cyclical PVs from being infinitely long. + if hash_history.contains(&hash) { + break; + } + hash_history.push(hash); + } + } +} + +pub(super) trait ConcurrentTable { + fn concurrent_store( + &self, hash: u64, value: Evaluation, depth: u8, flag: EntryFlag, best_move: M, + ); + fn concurrent_advance_generation(&self); + + // Update table based on negamax results. + fn concurrent_update( + &self, hash: u64, alpha_orig: Evaluation, beta: Evaluation, depth: u8, best: Evaluation, + best_move: M, + ) { + let flag = if best <= alpha_orig { + EntryFlag::Upperbound + } else if best >= beta { + EntryFlag::Lowerbound + } else { + EntryFlag::Exact + }; + self.concurrent_store(hash, best, depth, flag, best_move); + } +} + +impl + ConcurrentTable> Table for Arc { + fn lookup(&self, hash: u64) -> Option> { + (**self).lookup(hash) + } + fn store(&mut self, hash: u64, value: Evaluation, depth: u8, flag: EntryFlag, best_move: M) { + self.concurrent_store(hash, value, depth, flag, best_move) + } + fn advance_generation(&mut self) { + self.concurrent_advance_generation() + } +} + +// A concurrent table that doesn't bother to use atomic operations to access its entries. +// It's crazily unsafe, but somehow StockFish gets away with this? +pub(super) struct RacyTable { + table: Vec>, + mask: usize, + // Incremented for each iterative deepening run. + // Values from old generations are always overwritten. + generation: AtomicU8, +} + +#[allow(dead_code)] +impl RacyTable { + pub(super) fn new(table_byte_size: usize) -> Self { + let size = (table_byte_size / std::mem::size_of::>()).next_power_of_two(); + let mask = size - 1; + let mut table = Vec::with_capacity(size); + for _ in 0..size { + table.push(Entry:: { + high_hash: 0, + value: 0, + depth: 0, + flag: EntryFlag::Exact, + generation: 0, + best_move: None, + }); + } + Self { table, mask, generation: AtomicU8::new(0) } + } +} + +impl Table for RacyTable { + fn lookup(&self, hash: u64) -> Option> { + let index = (hash as usize) & self.mask; + let entry = self.table[index]; + if high_bits(hash) == entry.high_hash { + return Some(entry); + } + None + } + fn store(&mut self, hash: u64, value: Evaluation, depth: u8, flag: EntryFlag, best_move: M) { + self.concurrent_store(hash, value, depth, flag, best_move) + } + fn advance_generation(&mut self) { + self.concurrent_advance_generation() + } +} + +impl ConcurrentTable for RacyTable { + fn concurrent_store( + &self, hash: u64, value: Evaluation, depth: u8, flag: EntryFlag, best_move: M, + ) { + let table_gen = self.generation.load(Ordering::Relaxed); + let index = (hash as usize) & self.mask; + let entry = &self.table[index]; + if entry.generation != table_gen || entry.depth <= depth { + #[allow(mutable_transmutes)] + let ptr = unsafe { std::mem::transmute::<&Entry, &mut Entry>(entry) }; + *ptr = Entry { + high_hash: high_bits(hash), + value, + depth, + flag, + generation: table_gen, + best_move: Some(best_move), + }; + } + } + + fn concurrent_advance_generation(&self) { + self.generation.fetch_add(1, Ordering::Relaxed); + } +} + +#[repr(align(16))] +struct ConcurrentEntry { + high_hash: AtomicU32, + value: Evaluation, + depth: u8, + flag: EntryFlag, + generation: u8, + best_move: Option, +} + +pub(super) struct LockfreeTable { + table: Vec>, + mask: usize, + generation: AtomicU8, +} + +// Safe for cross-thread usage because of manual concurrency operations. +unsafe impl Sync for LockfreeTable {} + +impl Table for LockfreeTable { + fn lookup(&self, hash: u64) -> Option> { + let index = (hash as usize) & self.mask; + let entry = &self.table[index]; + let table_hash = entry.high_hash.load(Ordering::Acquire); + if high_bits(hash) | 1 == table_hash | 1 { + // Copy contents + let ret = Some(Entry { + // No one reads the hash. + high_hash: 0, + value: entry.value, + depth: entry.depth, + flag: entry.flag, + generation: entry.generation, + best_move: entry.best_move, + }); + // Verify the hash hasn't changed during the copy. + if table_hash == entry.high_hash.load(Ordering::SeqCst) { + return ret; + } + } + None + } + + fn store(&mut self, hash: u64, value: Evaluation, depth: u8, flag: EntryFlag, best_move: M) { + self.concurrent_store(hash, value, depth, flag, best_move) + } + fn advance_generation(&mut self) { + self.concurrent_advance_generation() + } +} + +#[allow(dead_code)] +impl LockfreeTable { + const WRITING_SENTINEL: u32 = 0xffff_ffff; + + pub(super) fn new(table_byte_size: usize) -> Self { + let size = + (table_byte_size / std::mem::size_of::>()).next_power_of_two(); + let mask = size - 1; + let mut table = Vec::with_capacity(size); + for _ in 0..size { + table.push(ConcurrentEntry:: { + high_hash: AtomicU32::new(0x5555_5555), + value: 0, + depth: 0, + flag: EntryFlag::Exact, + generation: 0, + best_move: None, + }); + } + Self { table, mask, generation: AtomicU8::new(0) } + } +} + +impl ConcurrentTable for LockfreeTable { + fn concurrent_store( + &self, hash: u64, value: Evaluation, depth: u8, flag: EntryFlag, best_move: M, + ) { + let table_gen = self.generation.load(Ordering::Relaxed); + let index = (hash as usize) & self.mask; + let entry = &self.table[index]; + // TODO: some not-totally racy reads of generation and depth + if entry.generation != table_gen || entry.depth <= depth { + // Set hash to sentinel value during write. + let x = entry.high_hash.load(Ordering::Acquire); + if x == Self::WRITING_SENTINEL { + // Someone's already writing, just forget it. + return; + } + // Try to set to sentinel value: + if entry + .high_hash + .compare_exchange_weak( + x, + Self::WRITING_SENTINEL, + Ordering::Acquire, + Ordering::Relaxed, + ) + .is_err() + { + // Someone just started writing, just forget it. + return; + } + + // concurrent_lookup will throw out any read that occurs across a write. + #[allow(mutable_transmutes)] + let entry = unsafe { + std::mem::transmute::<&ConcurrentEntry, &mut ConcurrentEntry>(entry) + }; + entry.value = value; + entry.depth = depth; + entry.flag = flag; + entry.generation = table_gen; + entry.best_move = Some(best_move); + + // Set hash to correct value to indicate done. + let new_hash = if high_bits(hash) | 1 == x | 1 { + // If we're overwriting the same hash, flip the lowest bit to + // catch any readers reading across this change. + x ^ 1 + } else { + high_bits(hash) + }; + entry.high_hash.store(new_hash, Ordering::Release); + } + } + + fn concurrent_advance_generation(&self) { + self.generation.fetch_add(1, Ordering::Relaxed); + } +} + +// A single-threaded utility to find moves that have done well in other branches. +pub(super) struct CounterMoves { + countermove_enabled: bool, + history_enabled: bool, + // For a given move index, which followup most recently led to a beta cutoff? + countermove_table: Vec, + // For each move index, how many beta cutoffs has it produced? + history_table: Vec, +} + +impl CounterMoves +where + G::M: Eq + Copy, +{ + pub(super) fn new(countermove_enabled: bool, history_enabled: bool) -> Self { + Self { + countermove_enabled, + history_enabled, + countermove_table: Vec::new(), + history_table: Vec::new(), + } + } + + pub(super) fn reorder(&self, prev: Option, moves: &mut [G::M]) { + if !self.history_table.is_empty() { + // Stable sort to preserve previous orderings. + moves.sort_by_key(|&m| !self.history_table[G::table_index(m) as usize]); + } + if let Some(prev) = prev { + if let Some(response) = self.countermove_table.get(G::table_index(prev) as usize) { + move_to_front(*response, moves); + } + } + } + + pub(super) fn update(&mut self, prev: Option, m: G::M) { + if let Some(prev) = prev { + if let Some(entry) = self.countermove_table.get_mut(G::table_index(prev) as usize) { + *entry = m; + } + } + if let Some(entry) = self.history_table.get_mut(G::table_index(m) as usize) { + *entry = 1u32.saturating_add(*entry); + } + } + + pub(super) fn advance_generation(&mut self, null_move: Option) { + // Lazily allocate tables + if self.countermove_enabled && self.countermove_table.is_empty() { + if let Some(m) = null_move { + self.countermove_table = vec![m; G::max_table_index() as usize + 1]; + } + } + if self.history_enabled && self.history_table.is_empty() { + self.history_table = vec![0; G::max_table_index() as usize + 1]; + } + + // Partially degrade old values, to bias towards new data. + self.history_table.iter_mut().for_each(|n| *n >>= 3); + } +} diff --git a/src/strategies/ybw.rs b/src/strategies/ybw.rs new file mode 100644 index 0000000..e29be18 --- /dev/null +++ b/src/strategies/ybw.rs @@ -0,0 +1,535 @@ +//! An implementation of iterative deeping, with each iteration executed in parallel. +//! +//! This implementation evaluates the best guess at each move first, then +//! parallelizes all other moves using rayon. +//! +//! This is based on the Young Brothers Wait Concept and CilkChess. + +extern crate rayon; + +use super::super::interface::*; +use super::super::util::*; +use super::common::*; +use super::iterative::{IterativeOptions, Stats}; +use super::sync_util::{par_iter_in_order, timeout_signal, CachePadded, ThreadLocal}; +use super::table::*; + +use rayon::prelude::*; +use std::cmp::max; +use std::sync::atomic::{AtomicBool, AtomicI16, Ordering}; +use std::sync::{Arc, Mutex}; +use std::time::{Duration, Instant}; + +/// Options to use for the parallel search engine. +#[derive(Clone, Copy)] +pub struct ParallelOptions { + pub num_threads: Option, + serial_cutoff_depth: u8, + pub background_pondering: bool, +} + +impl ParallelOptions { + pub fn new() -> Self { + ParallelOptions { num_threads: None, serial_cutoff_depth: 1, background_pondering: false } + } +} + +impl Default for ParallelOptions { + fn default() -> Self { + Self::new() + } +} + +impl ParallelOptions { + /// Set the total number of threads to use. Otherwise defaults to num_cpus. + pub fn with_num_threads(mut self, num_threads: usize) -> Self { + self.num_threads = Some(num_threads); + self + } + + /// At what depth should we stop trying to parallelize and just run serially. + pub fn with_serial_cutoff_depth(mut self, depth: u8) -> Self { + self.serial_cutoff_depth = depth; + self + } + + /// Continuing processing during opponent's move. + pub fn with_background_pondering(mut self) -> Self { + self.background_pondering = true; + self + } + + pub fn num_threads(self) -> usize { + self.num_threads.unwrap_or_else(num_cpus::get) + } +} + +struct ParallelNegamaxer { + table: Arc::M>>, + eval: E, + opts: IterativeOptions, + par_opts: ParallelOptions, + timeout: Arc, + stats: ThreadLocal>, + move_pool: ThreadLocal::M>>, + countermoves: ThreadLocal>, + pv: Mutex::M>>, +} + +impl ParallelNegamaxer +where + ::S: Clone + Send + Sync, + ::M: Copy + Eq + Send + Sync, + E: Clone + Sync + Send + 'static, +{ + fn new( + opts: IterativeOptions, par_opts: ParallelOptions, eval: E, + table: Arc::M>>, timeout: Arc, + thread_pool: &rayon::ThreadPool, + ) -> Self { + Self { + table, + eval, + opts, + par_opts, + timeout, + stats: ThreadLocal::new(CachePadded::default, thread_pool), + move_pool: ThreadLocal::new(MovePool::default, thread_pool), + countermoves: ThreadLocal::new( + || CounterMoves::new(opts.countermove_table, opts.countermove_history_table), + thread_pool, + ), + pv: Mutex::new(Vec::new()), + } + } + + fn principal_variation(&self) -> Vec<::M> { + self.pv.lock().unwrap().clone() + } + + fn null_move_check( + &self, s: &mut ::S, depth: u8, beta: Evaluation, + ) -> Option { + if let (Some(depth_reduction), Some(null_move)) = + (self.opts.null_move_depth, E::G::null_move(s)) + { + // Default to a minimum of depth=1 after null moving. + if depth > depth_reduction && + // If the position already seems pretty awesome. + self.eval.evaluate(s) >= beta + { + // If we just pass and let the opponent play this position (at reduced depth), + let mut nulled = AppliedMove::::new(s, null_move); + let value = + -self.negamax(&mut nulled, None, depth - depth_reduction, -beta, -beta + 1)?; + // is the result still so good that we shouldn't bother with a full search? + if value >= beta { + return Some(value); + } + } + } + // If we didn't check, return a low value that won't trigger beta cutoff. + Some(WORST_EVAL) + } + + // Negamax only among noisy moves. + fn noisy_negamax( + &self, s: &mut ::S, depth: u8, mut alpha: Evaluation, beta: Evaluation, + ) -> Option { + if self.timeout.load(Ordering::Relaxed) { + return None; + } + if let Some(winner) = E::G::get_winner(s) { + return Some(winner.evaluate()); + } + if depth == 0 { + return Some(self.eval.evaluate(s)); + } + + let mut moves = Vec::new(); + self.move_pool.local_do(|pool| moves = pool.alloc()); + self.eval.generate_noisy_moves(s, &mut moves); + if moves.is_empty() { + self.move_pool.local_do(|pool| pool.free(moves)); + return Some(self.eval.evaluate(s)); + } + + let mut best = WORST_EVAL; + for &m in moves.iter() { + let mut new = AppliedMove::::new(s, m); + let value = -self.noisy_negamax(&mut new, depth - 1, -beta, -alpha)?; + best = max(best, value); + alpha = max(alpha, value); + if alpha >= beta { + break; + } + } + self.move_pool.local_do(|pool| pool.free(moves)); + Some(best) + } + + // Recursively compute negamax on the game state. Returns None if it hits the timeout. + fn negamax( + &self, s: &mut ::S, prev_move: Option<::M>, depth: u8, + mut alpha: Evaluation, mut beta: Evaluation, + ) -> Option + where + ::S: Clone + Send + Sync, + ::M: Copy + Eq + Send + Sync, + E: Sync, + { + if self.timeout.load(Ordering::Relaxed) { + return None; + } + + self.stats.local_do(|stats| stats.explore_node()); + + if depth == 0 { + // Evaluate quiescence search on leaf nodes. + // Will just return the node's evaluation if quiescence search is disabled. + return self.noisy_negamax(s, self.opts.max_quiescence_depth, alpha, beta); + } + if let Some(winner) = E::G::get_winner(s) { + return Some(winner.evaluate()); + } + + let alpha_orig = alpha; + let hash = E::G::zobrist_hash(s); + let mut good_move = None; + if let Some(value) = self.table.check(hash, depth, &mut good_move, &mut alpha, &mut beta) { + return Some(value); + } + + if self.null_move_check(s, depth, beta)? >= beta { + return Some(beta); + } + + let mut moves = Vec::new(); + self.move_pool.local_do(|pool| moves = pool.alloc()); + E::G::generate_moves(s, &mut moves); + self.stats.local_do(|stats| stats.generate_moves(moves.len())); + if moves.is_empty() { + self.move_pool.local_do(|pool| pool.free(moves)); + return Some(WORST_EVAL); + } + + // Reorder moves. + if depth >= self.opts.min_reorder_moves_depth { + // TODO: reorder moves + } + self.countermoves.local_do(|cm| cm.reorder(prev_move, &mut moves)); + if let Some(good) = good_move { + move_to_front(good, &mut moves); + } + + let first_move = moves[0]; + + // Evaluate first move serially. + let initial_value = { + let mut new = AppliedMove::::new(s, first_move); + -self.negamax(&mut new, Some(first_move), depth - 1, -beta, -alpha)? + }; + alpha = max(alpha, initial_value); + let (best, best_move) = if alpha >= beta { + // Skip search + (initial_value, first_move) + } else if self.par_opts.serial_cutoff_depth >= depth { + // Serial search + let mut best = initial_value; + let mut best_move = first_move; + let mut null_window = false; + for &m in moves[1..].iter() { + let mut new = AppliedMove::::new(s, m); + let value = if null_window { + let probe = -self.negamax(&mut new, Some(m), depth - 1, -alpha - 1, -alpha)?; + if probe > alpha && probe < beta { + // Full search fallback. + -self.negamax(&mut new, Some(m), depth - 1, -beta, -probe)? + } else { + probe + } + } else { + -self.negamax(&mut new, Some(m), depth - 1, -beta, -alpha)? + }; + if value > best { + best = value; + best_move = m; + } + if value > alpha { + alpha = value; + // Now that we've found a good move, assume following moves + // are worse, and seek to cull them without full evaluation. + null_window = self.opts.null_window_search; + } + if alpha >= beta { + self.countermoves.local_do(|cm| cm.update(prev_move, m)); + break; + } + } + (best, best_move) + } else { + let alpha = AtomicI16::new(alpha); + let best_move = Mutex::new(ValueMove::new(initial_value, first_move)); + // Parallel search + let result = par_iter_in_order(&moves[1..]).try_for_each(|&m| -> Option<()> { + // Check to see if we're cancelled by another branch. + let initial_alpha = alpha.load(Ordering::SeqCst); + if initial_alpha >= beta { + return None; + } + + let mut state = s.clone(); + let mut new = AppliedMove::::new(&mut state, m); + let value = if self.opts.null_window_search && initial_alpha > alpha_orig { + // TODO: send reference to alpha as neg_beta to children. + let probe = -self.negamax( + &mut new, + Some(m), + depth - 1, + -initial_alpha - 1, + -initial_alpha, + )?; + if probe > initial_alpha && probe < beta { + // Check again that we're not cancelled. + if alpha.load(Ordering::SeqCst) >= beta { + return None; + } + // Full search fallback. + -self.negamax(&mut new, Some(m), depth - 1, -beta, -probe)? + } else { + probe + } + } else { + -self.negamax(&mut new, Some(m), depth - 1, -beta, -initial_alpha)? + }; + + alpha.fetch_max(value, Ordering::SeqCst); + let mut bests = best_move.lock().unwrap(); + bests.max(value, m); + Some(()) + }); + if result.is_none() { + // Check for timeout. + if self.timeout.load(Ordering::Relaxed) { + return None; + } + } + best_move.into_inner().unwrap().into_inner() + }; + + self.table.concurrent_update(hash, alpha_orig, beta, depth, best, best_move); + self.move_pool.local_do(|pool| pool.free(moves)); + Some(clamp_value(best)) + } + + fn iterative_search( + &self, mut state: ::S, max_depth: u8, background: bool, + ) -> Option<(::M, Evaluation, u8)> { + self.table.concurrent_advance_generation(); + let root_hash = E::G::zobrist_hash(&state); + let mut best_move = None; + let mut best_value = 0; + let mut interval_start; + + let mut depth = max_depth % self.opts.step_increment; + if depth == 0 { + depth = self.opts.step_increment; + } + while depth <= max_depth { + interval_start = Instant::now(); + if self.negamax(&mut state, None, depth, WORST_EVAL, BEST_EVAL).is_none() { + // Timeout. Return the best move from the previous depth. + break; + } + let entry = match self.table.lookup(root_hash) { + Some(entry) => entry, + None => { + if background { + // Main tasks overwrote our result, just bail early. + return None; + } else { + panic!("Probably some race condition ate the best entry."); + } + } + }; + + best_move = entry.best_move; + best_value = entry.value; + + if self.opts.verbose && !background { + let interval = Instant::now() - interval_start; + let mut stats = Stats::default(); + self.stats.do_all(|s| stats.add(s)); + let mbf = stats.total_generated_moves as f64 / stats.total_generate_move_calls as f64; + let ebf = (stats.nodes_explored as f64).powf(((depth as f64) + 1.0).recip()); + let nps = stats.nodes_explored as f64 / interval.as_secs_f64(); + let count = stats.nodes_explored; + eprintln!( + "Parallel (threads={}) depth={:>2}, took={:>6}ms; returned{:>5}; bestmove {}; MBF={mbf:>6.1} EBF={ebf:>6.1}; NPS={nps:>9.0}; total={count:>11}", + self.par_opts.num_threads(), + depth, + interval.as_millis(), + entry.value_string(), + move_id::(&state, best_move) + ); + } + + depth += self.opts.step_increment; + let mut pv_moves = Vec::new(); + self.table.populate_pv::(&mut pv_moves, &state); + self.pv.lock().unwrap().clone_from(&pv_moves); + if unclamp_value(entry.value).abs() == BEST_EVAL { + break; + } + } + best_move.map(|m| (m, best_value, depth)) + } +} + +pub struct ParallelSearch { + max_depth: u8, + max_time: Duration, + + background_cancel: Arc, + table: Arc::M>>, + prev_value: Evaluation, + principal_variation: Vec<::M>, + eval: E, + + thread_pool: rayon::ThreadPool, + + opts: IterativeOptions, + par_opts: ParallelOptions, +} + +impl ParallelSearch { + pub fn new(eval: E, opts: IterativeOptions, par_opts: ParallelOptions) -> ParallelSearch { + let table = Arc::new(LockfreeTable::new(opts.table_byte_size)); + let num_threads = par_opts.num_threads(); + let pool_builder = rayon::ThreadPoolBuilder::new().num_threads(num_threads); + ParallelSearch { + max_depth: 99, + max_time: Duration::from_secs(5), + background_cancel: Arc::new(AtomicBool::new(false)), + table, + prev_value: 0, + principal_variation: Vec::new(), + thread_pool: pool_builder.build().unwrap(), + opts, + par_opts, + eval, + } + } + + #[doc(hidden)] + pub fn root_value(&self) -> Evaluation { + unclamp_value(self.prev_value) + } + + fn pretty_stats(&self, stats: &Stats, start: Instant, minimax: &ParallelNegamaxer, depth: u8) -> String { + let interval = Instant::now() - start; + let mbf = + stats.total_generated_moves as f64 / stats.total_generate_move_calls as f64; + let ebf = + (stats.nodes_explored as f64).powf((depth as f64 + 1.0).recip()); + let nps = (stats.nodes_explored) as f64 / interval.as_secs_f64(); + let count = stats.nodes_explored; + format!( + "Parallel (threads={}) depth={:>2}, took={:>6.0}ms; MBF={mbf:>6.1} EBF={ebf:>6.1}; NPS={nps:>9.0}; total={count:>11}", + minimax.par_opts.num_threads(), + depth, + interval.as_secs_f64()*1000.0, + ) + } +} + +impl Strategy for ParallelSearch +where + ::S: Clone + Send + Sync, + ::M: Copy + Eq + Send + Sync, + E: Clone + Sync + Send + 'static, +{ + fn choose_move(&mut self, s: &::S) -> Option<::M> { + if E::G::get_winner(s).is_some() { + return None; + } + // Cancel any ongoing background processing. + self.background_cancel.store(true, Ordering::Relaxed); + // Start timer if configured. + let timeout = if self.max_time == Duration::new(0, 0) { + Arc::new(AtomicBool::new(false)) + } else { + timeout_signal(self.max_time) + }; + + let (best_move, value) = { + let start_time = Instant::now(); + let mut negamaxer = ParallelNegamaxer::new( + self.opts, + self.par_opts, + self.eval.clone(), + self.table.clone(), + timeout, + &self.thread_pool, + ); + // Launch in threadpool and wait for result. + let value_move_depth = self + .thread_pool + .install(|| negamaxer.iterative_search(s.clone(), self.max_depth, false)); + self.principal_variation = negamaxer.principal_variation(); + let mut stats = Stats::default(); + negamaxer.stats.do_all_mut(|local| stats.add(local)); + if self.opts.verbose { + eprintln!("{}", "——————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————"); + eprintln!("{}", self.pretty_stats(&stats, start_time, &negamaxer, value_move_depth.map_or(0, |v| v.2))); + eprintln!("principal variation: {}", pv_string::(&self.principal_variation(), s)); + } + let value_move = value_move_depth.map(|v| (v.0, v.1)); + value_move + }?; + self.prev_value = value; + + if self.par_opts.background_pondering { + self.background_cancel = Arc::new(AtomicBool::new(false)); + // Create a separate negamaxer to have a dedicated cancel + // signal, and to allow the negamaxer to outlive this scope. + let negamaxer = ParallelNegamaxer::new( + self.opts, + self.par_opts, + self.eval.clone(), + self.table.clone(), + self.background_cancel.clone(), + &self.thread_pool, + ); + let mut state = s.clone(); + if let Some(new_state) = E::G::apply(&mut state, best_move) { + state = new_state; + } + // Launch in threadpool asynchronously. + self.thread_pool.spawn(move || { + negamaxer.iterative_search(state, 99, true); + }); + } + Some(best_move) + } + + fn set_timeout(&mut self, max_time: Duration) { + self.max_time = max_time; + self.max_depth = 99; + } + + fn set_max_depth(&mut self, depth: u8) { + self.max_depth = depth; + self.max_time = Duration::new(0, 0); + } + + fn principal_variation(&self) -> Vec<::M> { + self.principal_variation.clone() + } +} + +impl Drop for ParallelSearch { + fn drop(&mut self) { + self.background_cancel.store(true, Ordering::Relaxed); + } +} diff --git a/src/util.rs b/src/util.rs index 2e44993..11f19cd 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1,32 +1,199 @@ //! Utility functions for testing, and tests. +#[cfg(not(target_arch = "wasm32"))] +extern crate rayon; + use super::interface; -use super::interface::Move; +use super::interface::Game; + +#[cfg(not(target_arch = "wasm32"))] +use rayon::prelude::*; use std::default::Default; +use std::time::Instant; + +pub(crate) struct AppliedMove<'a, G: Game> { + old: &'a mut ::S, + new: Option<::S>, + m: ::M, +} + +impl std::ops::Deref for AppliedMove<'_, G> { + type Target = ::S; + fn deref(&self) -> &::S { + self.new.as_ref().unwrap_or(self.old) + } +} + +impl std::ops::DerefMut for AppliedMove<'_, G> { + fn deref_mut(&mut self) -> &mut ::S { + self.new.as_mut().unwrap_or(self.old) + } +} + +impl Drop for AppliedMove<'_, G> { + fn drop(&mut self) { + ::undo(self.old, self.m) + } +} + +impl<'a, G: Game> AppliedMove<'a, G> { + pub(crate) fn new(old: &'a mut ::S, m: ::M) -> Self { + let new = G::apply(old, m); + AppliedMove { old, new, m } + } +} /// Play a complete, new game with players using the two provided strategies. /// -/// The first strategy will be `Player::Computer`, the other `Player::Opponent`. -/// Returns result of the game. -pub fn battle_royale(s1: &mut S1, s2: &mut S2) -> interface::Winner - where G: interface::Game, - G::S: Default, - S1: interface::Strategy, - S2: interface::Strategy +/// Returns `None` if the game ends in a draw, or `Some(0)`, `Some(1)` if the +/// first or second strategy won, respectively. +pub fn battle_royale(s1: &mut S1, s2: &mut S2) -> Option +where + G: interface::Game, + G::S: Default, + S1: interface::Strategy, + S2: interface::Strategy, { let mut state = G::S::default(); - let mut strategies: Vec<(interface::Player, &mut interface::Strategy)> = vec![ - (interface::Player::Computer, s1), - (interface::Player::Opponent, s2), - ]; + let mut strategies: [&mut dyn interface::Strategy; 2] = [s1, s2]; let mut s = 0; - while G::get_winner(&state).is_none() { - let (p, ref mut strategy) = strategies[s]; - match strategy.choose_move(&mut state, p) { - Some(m) => m.apply(&mut state), - None => break, + loop { + if let Some(winner) = G::get_winner(&state) { + return match winner { + interface::Winner::Draw => None, + interface::Winner::PlayerJustMoved => Some(1 - s), + interface::Winner::PlayerToMove => Some(s), + }; + } + let strategy = &mut strategies[s]; + match strategy.choose_move(&state) { + Some(m) => { + if let Some(new_state) = G::apply(&mut state, m) { + state = new_state; + } + } + None => return None, } s = 1 - s; } - G::get_winner(&state).unwrap() +} + +pub(crate) struct MovePool { + pool: Vec>, +} + +impl Default for MovePool { + fn default() -> Self { + Self { pool: Vec::new() } + } +} + +impl MovePool { + pub(crate) fn alloc(&mut self) -> Vec { + self.pool.pop().unwrap_or_default() + } + + pub(crate) fn free(&mut self, mut vec: Vec) { + vec.clear(); + self.pool.push(vec); + } +} + +#[cfg(not(target_arch = "wasm32"))] +fn perft_recurse( + pool: &mut MovePool, state: &mut G::S, depth: u8, single_thread_cutoff: u8, +) -> u64 +where + ::S: Clone + Sync, + ::M: Copy + Sync, +{ + if depth == 0 { + return 1; + } + if G::get_winner(state).is_some() { + // Apparently perft rules only count positions at the target depth. + return 0; + } + let mut moves = pool.alloc(); + G::generate_moves(state, &mut moves); + let n = if depth == 1 { + moves.len() as u64 + } else if depth <= single_thread_cutoff { + // Single-thread recurse. + let mut count = 0; + for &m in moves.iter() { + let mut new = AppliedMove::::new(state, m); + count += perft_recurse::(pool, &mut new, depth - 1, single_thread_cutoff); + } + count + } else { + // Multi-thread recurse. + moves + .par_iter() + .with_max_len(1) + .map(|m| { + let mut state = state.clone(); + let mut pool2 = MovePool::::default(); + if let Some(new_state) = G::apply(&mut state, *m) { + state = new_state; + } + perft_recurse::(&mut pool2, &mut state, depth - 1, single_thread_cutoff) + }) + .sum() + }; + pool.free(moves); + n +} + +#[cfg(target_arch = "wasm32")] +fn perft_recurse( + pool: &mut MovePool, state: &mut G::S, depth: u8, single_thread_cutoff: u8, +) -> u64 +where + ::S: Clone + Sync, + ::M: Copy + Sync, +{ + if depth == 0 { + return 1; + } + if G::get_winner(state).is_some() { + // Apparently perft rules only count positions at the target depth. + return 0; + } + let mut moves = pool.alloc(); + G::generate_moves(state, &mut moves); + let n = if depth == 1 { + moves.len() as u64 + } else { + // Single-thread recurse. + let mut count = 0; + for &m in moves.iter() { + let mut new = AppliedMove::::new(state, m); + count += perft_recurse::(pool, &mut new, depth - 1, single_thread_cutoff); + } + count + }; + pool.free(moves); + n +} + +pub fn perft(state: &mut ::S, max_depth: u8, multi_threaded: bool) -> Vec +where + ::S: Clone + Sync, + ::M: Copy + Sync, +{ + println!("depth count time kn/s"); + let mut pool = MovePool::::default(); + let mut counts = Vec::new(); + let single_thread_cutoff = if multi_threaded { 3 } else { max_depth }; + for depth in 0..max_depth + 1 { + let start = Instant::now(); + let count = perft_recurse::(&mut pool, state, depth, single_thread_cutoff); + let dur = start.elapsed(); + let rate = count as f64 / dur.as_secs_f64() / 1000.0; + let time = format!("{:.1?}", dur); + println!("{:>5} {:>15} {:>11} {:>11.1}", depth, count, time, rate); + counts.push(count); + } + counts } diff --git a/tests/strategies.rs b/tests/strategies.rs new file mode 100644 index 0000000..e64de1b --- /dev/null +++ b/tests/strategies.rs @@ -0,0 +1,258 @@ +// For a given Evaluator and depth, all negamax Strategies should produce the same +// value for the root. They use different techniques and pruning heuristics +// for speed, but it's all fundamentally the minimax algorithm. This file +// creates fake evaluation trees of connect four, and ensures that all +// Strategies (including a plain negamax without alpha-beta) get the same answer. + +extern crate minimax; +extern crate rand; +#[path = "../examples/connect4.rs"] +mod connect4; + +use minimax::*; +use rand::prelude::IndexedRandom; +use std::cmp::max; +use std::collections::hash_map::DefaultHasher; +use std::hash::Hasher; + +pub struct PlainNegamax { + depth: u8, + root_value: Evaluation, + // All moves tied with the best valuation. + best_moves: Vec<::M>, + eval: E, +} + +impl PlainNegamax { + pub fn new(eval: E, depth: u8) -> PlainNegamax { + PlainNegamax { depth: depth, root_value: 0, best_moves: Vec::new(), eval } + } + + fn negamax(&self, s: &mut ::S, depth: u8) -> Evaluation + where + <::G as Game>::M: Copy, + { + if let Some(winner) = E::G::get_winner(s) { + return winner.evaluate(); + } + if depth == 0 { + return self.eval.evaluate(s); + } + let mut moves = Vec::new(); + E::G::generate_moves(s, &mut moves); + let mut best = WORST_EVAL; + for &m in moves.iter() { + let mut new = E::G::apply(s, m).unwrap(); + let value = -self.negamax(&mut new, depth - 1); + best = max(best, value); + } + best + } +} + +impl Strategy for PlainNegamax +where + ::S: Clone, + ::M: Copy, +{ + fn choose_move(&mut self, s: &::S) -> Option<::M> { + let mut moves = Vec::new(); + E::G::generate_moves(s, &mut moves); + + self.best_moves.clear(); + let mut best_value = WORST_EVAL; + let mut s = s.clone(); + for &m in moves.iter() { + let mut new = E::G::apply(&mut s, m).unwrap(); + let value = -self.negamax(&mut new, self.depth - 1); + if value == best_value { + self.best_moves.push(m); + } else if value > best_value { + best_value = value; + self.best_moves.clear(); + self.best_moves.push(m); + } + } + self.root_value = best_value; + self.best_moves.first().map(|m| *m) + } +} + +#[derive(Clone)] +struct RandomEvaluator; + +impl Default for RandomEvaluator { + fn default() -> Self { + Self {} + } +} + +impl minimax::Evaluator for RandomEvaluator { + type G = connect4::Game; + fn evaluate(&self, b: &connect4::Board) -> minimax::Evaluation { + // Hash the game state to get a deterministically random Evaluation. + let mut hasher = DefaultHasher::new(); + hasher.write_u64(b.pieces_just_moved()); + hasher.write_u64(b.pieces_to_move); + let hash = hasher.finish(); + // Use fewer bits so that we get some equal values. + (hash as minimax::Evaluation) >> 9 + } +} + +fn generate_random_state(depth: u8) -> connect4::Board { + let mut rng = rand::rng(); + let mut b = connect4::Board::default(); + for _ in 0..depth { + let mut moves = Vec::new(); + connect4::Game::generate_moves(&b, &mut moves); + let m = moves.choose(&mut rng).unwrap(); + let next = connect4::Game::apply(&mut b, *m).unwrap(); + if connect4::Game::get_winner(&next).is_some() { + // Oops, undo and try again on the next iter. + } else { + b = next; + } + } + b +} + +#[test] +fn test_winning_position() { + let mut b = connect4::Board::default(); + b = connect4::Game::apply(&mut b, connect4::Place { col: 2 }).unwrap(); + b = connect4::Game::apply(&mut b, connect4::Place { col: 3 }).unwrap(); + b = connect4::Game::apply(&mut b, connect4::Place { col: 2 }).unwrap(); + b = connect4::Game::apply(&mut b, connect4::Place { col: 3 }).unwrap(); + b = connect4::Game::apply(&mut b, connect4::Place { col: 2 }).unwrap(); + b = connect4::Game::apply(&mut b, connect4::Place { col: 3 }).unwrap(); + b = connect4::Game::apply(&mut b, connect4::Place { col: 2 }).unwrap(); + assert_eq!(Some(Winner::PlayerJustMoved), connect4::Game::get_winner(&b)); + + // Make sure none of the strategies die when given a winning position. + assert_eq!(None, Negamax::new(RandomEvaluator, 4).choose_move(&b)); + + let opt = IterativeOptions::new(); + assert_eq!(None, IterativeSearch::new(RandomEvaluator::default(), opt).choose_move(&b)); + assert_eq!( + None, + ParallelSearch::new(RandomEvaluator::default(), opt, ParallelOptions::default()) + .choose_move(&b) + ); +} + +#[test] +fn compare_plain_negamax() { + for _ in 0..100 { + for max_depth in 1..6 { + let b = generate_random_state(10); + + let mut plain_negamax = PlainNegamax::new(RandomEvaluator::default(), max_depth); + plain_negamax.choose_move(&b); + let value = plain_negamax.root_value; + + let mut negamax = Negamax::new(RandomEvaluator, max_depth); + let negamax_move = negamax.choose_move(&b).unwrap(); + let negamax_value = negamax.root_value(); + assert_eq!(value, negamax_value, "search depth={}\n{}", max_depth, b); + assert!( + plain_negamax.best_moves.contains(&negamax_move), + "bad move={:?}\nsearch depth={}\n{}", + negamax_move, + max_depth, + b + ); + + // Sampling of the configuration space. + for (option_num, opt) in vec![ + IterativeOptions::new() + .with_replacement_strategy(Replacement::DepthPreferred) + .with_null_window_search(false), + IterativeOptions::new() + .with_replacement_strategy(Replacement::Always) + .with_double_step_increment(), + IterativeOptions::new() + .with_replacement_strategy(Replacement::TwoTier) + .with_aspiration_window(5), + IterativeOptions::new().with_replacement_strategy(Replacement::TwoTier).with_mtdf(), + ] + .drain(..) + .enumerate() + { + let mut iterative = IterativeSearch::new( + RandomEvaluator::default(), + opt.with_table_byte_size(64000), + ); + iterative.set_max_depth(max_depth); + let iterative_move = iterative.choose_move(&b).unwrap(); + let iterative_value = iterative.root_value(); + assert_eq!( + value, iterative_value, + "search depth={}, option={}\n{}", + max_depth, option_num, b + ); + assert!( + plain_negamax.best_moves.contains(&iterative_move), + "bad move={:?}\nsearch depth={}\n{}", + iterative_move, + max_depth, + b + ); + } + + let opt = IterativeOptions::new().with_table_byte_size(64000); + let mut parallel = + ParallelSearch::new(RandomEvaluator::default(), opt, ParallelOptions::default()); + parallel.set_max_depth(max_depth); + let par_move = parallel.choose_move(&b).unwrap(); + let par_value = parallel.root_value(); + assert_eq!(value, par_value, "search depth={}\n{}", max_depth, b); + assert!( + plain_negamax.best_moves.contains(&par_move), + "bad move={:?}\nsearch depth={}\n{}", + par_move, + max_depth, + b + ); + } + } +} + +// The same test, but a deeper tree (without plain negamax) to try to expose +// more parallelism in the parallel strategies. +#[test] +fn compare_deep_negamax() { + let opt = IterativeOptions::new() + .with_table_byte_size(64000) + .with_countermoves() + .with_countermove_history(); + for iter in 0..10 { + for max_depth in 1..10 { + let b = generate_random_state(10); + + let mut negamax = Negamax::new(RandomEvaluator, max_depth); + negamax.choose_move(&b).unwrap(); + let value = negamax.root_value(); + + let mut iterative = IterativeSearch::new(RandomEvaluator::default(), opt); + iterative.set_max_depth(max_depth); + iterative.choose_move(&b).unwrap(); + let iterative_value = iterative.root_value(); + assert_eq!(value, iterative_value, "search depth={}\n{}", max_depth, b); + + let mut mtdf = + IterativeSearch::new(RandomEvaluator::default(), opt.clone().with_mtdf()); + mtdf.set_max_depth(max_depth); + mtdf.choose_move(&b).unwrap(); + let mtdf_value = mtdf.root_value(); + assert_eq!(value, mtdf_value, "search depth={}\n{}", max_depth, b); + + let mut parallel = + ParallelSearch::new(RandomEvaluator::default(), opt, ParallelOptions::default()); + parallel.set_max_depth(max_depth); + parallel.choose_move(&b).unwrap(); + let parallel_value = parallel.root_value(); + assert_eq!(value, parallel_value, "search iter={} depth={}\n{}", iter, max_depth, b); + } + } +} diff --git a/tests/ttt.rs b/tests/ttt.rs index 796677e..9d93ffd 100644 --- a/tests/ttt.rs +++ b/tests/ttt.rs @@ -1,18 +1,18 @@ extern crate minimax; -#[path="../examples/ttt.rs"] +#[path = "../examples/ttt.rs"] mod ttt; use minimax::util::battle_royale; +use minimax::{MCTSOptions, MonteCarloTreeSearch, Negamax, Random}; // Ensure that two players using negamax always results in a draw. #[test] fn test_ttt_negamax_always_draws() { - use minimax::strategies::negamax::{Negamax, Options}; - let mut s1 = Negamax::::new(Options { max_depth: 10 }); - let mut s2 = Negamax::::new(Options { max_depth: 10 }); + let mut s1 = Negamax::new(ttt::Evaluator::default(), 10); + let mut s2 = Negamax::new(ttt::Evaluator::default(), 10); for _ in 0..100 { - assert!(battle_royale(&mut s1, &mut s2) == minimax::Winner::Draw) + assert_eq!(battle_royale(&mut s1, &mut s2), None); } } @@ -20,12 +20,19 @@ fn test_ttt_negamax_always_draws() { // either a draw or a win for the former player. #[test] fn test_ttt_negamax_vs_random_always_wins_or_draws() { - use minimax::strategies::negamax::{Negamax, Options}; - use minimax::strategies::random::Random; - let mut s1 = Negamax::::new(Options { max_depth: 10 }); + let mut s1 = Negamax::new(ttt::Evaluator::default(), 10); let mut s2 = Random::new(); for _ in 0..100 { - assert!(battle_royale(&mut s1, &mut s2) != - minimax::Winner::Competitor(minimax::Player::Opponent)) + assert_ne!(battle_royale(&mut s1, &mut s2), Some(1)); + } +} + +#[test] +fn test_ttt_mcts_vs_random_always_wins_or_draws() { + let mut s1 = MonteCarloTreeSearch::new(MCTSOptions::default().with_num_threads(1)); + s1.set_max_rollouts(100); + let mut s2 = Random::new(); + for _ in 0..50 { + assert_ne!(battle_royale::(&mut s1, &mut s2), Some(1)); } }