Skip to content

Commit 6db51b4

Browse files
authored
Use Toxiproxy for failover testing (#44)
* Toxiproxy * up-to-date config * debug * hm * more * mroe * more * hmm * aha * less logs * cleaner * hmm * we test these now * update readme
1 parent a784883 commit 6db51b4

File tree

4 files changed

+158
-17
lines changed

4 files changed

+158
-17
lines changed

.circleci/pgcat.toml

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
#
2+
# PgCat config example.
3+
#
4+
5+
#
6+
# General pooler settings
7+
[general]
8+
9+
# What IP to run on, 0.0.0.0 means accessible from everywhere.
10+
host = "0.0.0.0"
11+
12+
# Port to run on, same as PgBouncer used in this example.
13+
port = 6432
14+
15+
# How many connections to allocate per server.
16+
pool_size = 15
17+
18+
# Pool mode (see PgBouncer docs for more).
19+
# session: one server connection per connected client
20+
# transaction: one server connection per client transaction
21+
pool_mode = "transaction"
22+
23+
# How long to wait before aborting a server connection (ms).
24+
connect_timeout = 100
25+
26+
# How much time to give `SELECT 1` health check query to return with a result (ms).
27+
healthcheck_timeout = 100
28+
29+
# For how long to ban a server if it fails a health check (seconds).
30+
ban_time = 60 # Seconds
31+
32+
# Stats will be sent here
33+
statsd_address = "127.0.0.1:8125"
34+
35+
#
36+
# User to use for authentication against the server.
37+
[user]
38+
name = "sharding_user"
39+
password = "sharding_user"
40+
41+
42+
#
43+
# Shards in the cluster
44+
[shards]
45+
46+
# Shard 0
47+
[shards.0]
48+
49+
# [ host, port, role ]
50+
servers = [
51+
[ "127.0.0.1", 5432, "primary" ],
52+
[ "localhost", 5433, "replica" ],
53+
# [ "127.0.1.1", 5432, "replica" ],
54+
]
55+
# Database name (e.g. "postgres")
56+
database = "shard0"
57+
58+
[shards.1]
59+
# [ host, port, role ]
60+
servers = [
61+
[ "127.0.0.1", 5432, "primary" ],
62+
[ "localhost", 5433, "replica" ],
63+
# [ "127.0.1.1", 5432, "replica" ],
64+
]
65+
database = "shard1"
66+
67+
[shards.2]
68+
# [ host, port, role ]
69+
servers = [
70+
[ "127.0.0.1", 5432, "primary" ],
71+
[ "localhost", 5433, "replica" ],
72+
# [ "127.0.1.1", 5432, "replica" ],
73+
]
74+
database = "shard2"
75+
76+
77+
# Settings for our query routing layer.
78+
[query_router]
79+
80+
# If the client doesn't specify, route traffic to
81+
# this role by default.
82+
#
83+
# any: round-robin between primary and replicas,
84+
# replica: round-robin between replicas only without touching the primary,
85+
# primary: all queries go to the primary unless otherwise specified.
86+
default_role = "any"
87+
88+
89+
# Query parser. If enabled, we'll attempt to parse
90+
# every incoming query to determine if it's a read or a write.
91+
# If it's a read query, we'll direct it to a replica. Otherwise, if it's a write,
92+
# we'll direct it to the primary.
93+
query_parser_enabled = false
94+
95+
# If the query parser is enabled and this setting is enabled, the primary will be part of the pool of databases used for
96+
# load balancing of read queries. Otherwise, the primary will only be used for write
97+
# queries. The primary can always be explicitely selected with our custom protocol.
98+
primary_reads_enabled = true
99+
100+
# So what if you wanted to implement a different hashing function,
101+
# or you've already built one and you want this pooler to use it?
102+
#
103+
# Current options:
104+
#
105+
# pg_bigint_hash: PARTITION BY HASH (Postgres hashing function)
106+
# sha1: A hashing function based on SHA1
107+
#
108+
sharding_function = "pg_bigint_hash"

.circleci/run_tests.sh

Lines changed: 47 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3,20 +3,34 @@
33
set -e
44
set -o xtrace
55

6+
# Start PgCat with a particular log level
7+
# for inspection.
8+
function start_pgcat() {
9+
kill -s SIGINT $(pgrep pgcat) || true
10+
RUST_LOG=${1} ./target/debug/pgcat .circleci/pgcat.toml &
11+
sleep 1
12+
}
13+
14+
# Setup the database with shards and user
615
psql -e -h 127.0.0.1 -p 5432 -U postgres -f tests/sharding/query_routing_setup.sql
716

8-
./target/debug/pgcat &
17+
# Install Toxiproxy to simulate a downed/slow database
18+
wget -O toxiproxy-2.1.4.deb https://github.com/Shopify/toxiproxy/releases/download/v2.1.4/toxiproxy_2.1.4_amd64.deb
19+
sudo dpkg -i toxiproxy-2.1.4.deb
920

21+
# Start Toxiproxy
22+
toxiproxy-server &
1023
sleep 1
1124

12-
# Setup PgBench
13-
pgbench -i -h 127.0.0.1 -p 6432
25+
# Create a database at port 5433, forward it to Postgres
26+
toxiproxy-cli create -l 127.0.0.1:5433 -u 127.0.0.1:5432 postgres_replica
1427

15-
# Run it
16-
pgbench -h 127.0.0.1 -p 6432 -t 500 -c 2 --protocol simple
28+
start_pgcat "info"
1729

18-
# Extended protocol
19-
pgbench -h 127.0.0.1 -p 6432 -t 500 -c 2 --protocol extended
30+
# pgbench test
31+
pgbench -i -h 127.0.0.1 -p 6432 && \
32+
pgbench -h 127.0.0.1 -p 6432 -t 500 -c 2 --protocol simple && \
33+
pgbench -h 127.0.0.1 -p 6432 -t 500 -c 2 --protocol extended
2034

2135
# COPY TO STDOUT test
2236
psql -h 127.0.0.1 -p 6432 -c 'COPY (SELECT * FROM pgbench_accounts LIMIT 15) TO STDOUT;' > /dev/null
@@ -35,18 +49,37 @@ psql -e -h 127.0.0.1 -p 6432 -f tests/sharding/query_routing_test_select.sql > /
3549
psql -e -h 127.0.0.1 -p 6432 -f tests/sharding/query_routing_test_primary_replica.sql > /dev/null
3650

3751
#
38-
# ActiveRecord tests!
52+
# ActiveRecord tests
3953
#
40-
cd tests/ruby
41-
sudo gem install bundler
42-
bundle install
43-
ruby tests.rb
54+
cd tests/ruby && \
55+
sudo gem install bundler && \
56+
bundle install && \
57+
ruby tests.rb && \
58+
cd ../..
59+
60+
# Start PgCat in debug to demonstrate failover better
61+
start_pgcat "debug"
62+
63+
# Add latency to the replica at port 5433 slightly above the healthcheck timeout
64+
toxiproxy-cli toxic add -t latency -a latency=300 postgres_replica
65+
sleep 1
66+
67+
# Note the failover in the logs
68+
timeout 5 psql -e -h 127.0.0.1 -p 6432 <<-EOF
69+
SELECT 1;
70+
SELECT 1;
71+
SELECT 1;
72+
EOF
73+
74+
# Remove latency
75+
toxiproxy-cli toxic remove --toxicName latency_downstream postgres_replica
76+
77+
start_pgcat "info"
4478

45-
cd ../../
4679
# Test session mode (and config reload)
4780
sed -i 's/pool_mode = "transaction"/pool_mode = "session"/' pgcat.toml
4881

49-
# Reload config
82+
# Reload config test
5083
kill -SIGHUP $(pgrep pgcat)
5184

5285
# Prepared statements that will only work in session mode

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,8 @@ See [sharding README](./tests/sharding/README.md) for sharding logic testing.
7979
| Session pooling | :white_check_mark: | :white_check_mark: | Tested by running pgbench with `--protocol prepared` which only works in session mode. |
8080
| `COPY` | :white_check_mark: | :white_check_mark: | `pgbench -i` uses `COPY`. `COPY FROM` is tested as well. |
8181
| Query cancellation | :white_check_mark: | :white_check_mark: | `psql -c 'SELECT pg_sleep(1000);'` and press `Ctrl-C`. |
82-
| Load balancing | :x: | :white_check_mark: | We could test this by emitting statistics for each replica and compare them. |
83-
| Failover | :x: | :white_check_mark: | Misconfigure a replica in `pgcat.toml` and watch it forward queries to spares. CI testing could include using Toxiproxy. |
82+
| Load balancing | :white_check_mark: | :white_check_mark: | We could test this by emitting statistics for each replica and compare them. |
83+
| Failover | :white_check_mark: | :white_check_mark: | Misconfigure a replica in `pgcat.toml` and watch it forward queries to spares. CI testing is using Toxiproxy. |
8484
| Sharding | :white_check_mark: | :white_check_mark: | See `tests/sharding` and `tests/ruby` for an Rails/ActiveRecord example. |
8585
| Statistics reporting | :x: | :white_check_mark: | Run `nc -l -u 8125` and watch the stats come in every 15 seconds. |
8686
| Live config reloading | :white_check_mark: | :white_check_mark: | Run `kill -s SIGHUP $(pgrep pgcat)` and watch the config reload. |

src/client.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,7 @@ impl Client {
311311
// or until the client disconnects if we are in session mode.
312312
loop {
313313
let mut message = if message.len() == 0 {
314-
debug!("Waiting for message inside transaction or in session mode");
314+
trace!("Waiting for message inside transaction or in session mode");
315315

316316
match read_message(&mut self.read).await {
317317
Ok(message) => message,

0 commit comments

Comments
 (0)