Skip to content

Commit d5f60b1

Browse files
authored
Allow shard setting with comments (#293)
What Allows shard selection by the client to come in via comments like /* shard_id: 1 */ select * from foo; Why We're using a setup in Ruby that makes it tough or impossible to inject commands on the connection to set the shard before it gets to the "real" SQL being run. Instead we have an updated PG adapter that allows injection of comments before each executed SQL statement. We need this support in pgcat in order to keep some complex shard picking logic in Ruby code while using pgcat for connection management. Local Testing Run postgres and pgcat with the default options. Run psql < tests/sharding/query_routing_setup.sql to setup the database for the tests and run ./tests/pgbench/external_shard_test.sh as often as needed to exercise the shard setting comment test.
1 parent 9388288 commit d5f60b1

File tree

4 files changed

+159
-8
lines changed

4 files changed

+159
-8
lines changed

pgcat.toml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,12 @@ query_parser_enabled = true
8585
# queries. The primary can always be explicitly selected with our custom protocol.
8686
primary_reads_enabled = true
8787

88+
# Allow sharding commands to be passed as statement comments instead of
89+
# separate commands. If these are unset this functionality is disabled.
90+
# sharding_key_regex = '/\* sharding_key: (\d+) \*/'
91+
# shard_id_regex = '/\* shard_id: (\d+) \*/'
92+
# regex_search_limit = 1000 # only look at the first 1000 characters of SQL statements
93+
8894
# So what if you wanted to implement a different hashing function,
8995
# or you've already built one and you want this pooler to use it?
9096
#

src/config.rs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
use arc_swap::ArcSwap;
33
use log::{error, info};
44
use once_cell::sync::Lazy;
5+
use regex::Regex;
56
use serde_derive::{Deserialize, Serialize};
67
use std::collections::{BTreeMap, HashMap, HashSet};
78
use std::hash::Hash;
@@ -342,8 +343,15 @@ pub struct Pool {
342343
#[serde(default = "Pool::default_automatic_sharding_key")]
343344
pub automatic_sharding_key: Option<String>,
344345

346+
pub sharding_key_regex: Option<String>,
347+
pub shard_id_regex: Option<String>,
348+
pub regex_search_limit: Option<usize>,
349+
345350
pub shards: BTreeMap<String, Shard>,
346351
pub users: BTreeMap<String, User>,
352+
// Note, don't put simple fields below these configs. There's a compatability issue with TOML that makes it
353+
// incompatible to have simple fields in TOML after complex objects. See
354+
// https://users.rust-lang.org/t/why-toml-to-string-get-error-valueaftertable/85903
347355
}
348356

349357
impl Pool {
@@ -387,6 +395,18 @@ impl Pool {
387395
shard.validate()?;
388396
}
389397

398+
for (option, name) in [
399+
(&self.shard_id_regex, "shard_id_regex"),
400+
(&self.sharding_key_regex, "sharding_key_regex"),
401+
] {
402+
if let Some(regex) = option {
403+
if let Err(parse_err) = Regex::new(regex.as_str()) {
404+
error!("{} is not a valid Regex: {}", name, parse_err);
405+
return Err(Error::BadConfig);
406+
}
407+
}
408+
}
409+
390410
Ok(())
391411
}
392412
}
@@ -405,6 +425,9 @@ impl Default for Pool {
405425
automatic_sharding_key: None,
406426
connect_timeout: None,
407427
idle_timeout: None,
428+
sharding_key_regex: None,
429+
shard_id_regex: None,
430+
regex_search_limit: Some(1000),
408431
}
409432
}
410433
}

src/pool.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ use once_cell::sync::Lazy;
88
use parking_lot::{Mutex, RwLock};
99
use rand::seq::SliceRandom;
1010
use rand::thread_rng;
11+
use regex::Regex;
1112
use std::collections::{HashMap, HashSet};
1213
use std::sync::{
1314
atomic::{AtomicBool, Ordering},
@@ -104,6 +105,15 @@ pub struct PoolSettings {
104105

105106
// Ban time
106107
pub ban_time: i64,
108+
109+
// Regex for searching for the sharding key in SQL statements
110+
pub sharding_key_regex: Option<Regex>,
111+
112+
// Regex for searching for the shard id in SQL statements
113+
pub shard_id_regex: Option<Regex>,
114+
115+
// Limit how much of each query is searched for a potential shard regex match
116+
pub regex_search_limit: usize,
107117
}
108118

109119
impl Default for PoolSettings {
@@ -121,6 +131,9 @@ impl Default for PoolSettings {
121131
healthcheck_delay: General::default_healthcheck_delay(),
122132
healthcheck_timeout: General::default_healthcheck_timeout(),
123133
ban_time: General::default_ban_time(),
134+
sharding_key_regex: None,
135+
shard_id_regex: None,
136+
regex_search_limit: 1000,
124137
}
125138
}
126139
}
@@ -300,6 +313,15 @@ impl ConnectionPool {
300313
healthcheck_delay: config.general.healthcheck_delay,
301314
healthcheck_timeout: config.general.healthcheck_timeout,
302315
ban_time: config.general.ban_time,
316+
sharding_key_regex: pool_config
317+
.sharding_key_regex
318+
.clone()
319+
.map(|regex| Regex::new(regex.as_str()).unwrap()),
320+
shard_id_regex: pool_config
321+
.shard_id_regex
322+
.clone()
323+
.map(|regex| Regex::new(regex.as_str()).unwrap()),
324+
regex_search_limit: pool_config.regex_search_limit.unwrap_or(1000),
303325
},
304326
validated: Arc::new(AtomicBool::new(false)),
305327
paused: Arc::new(AtomicBool::new(false)),

src/query_router.rs

Lines changed: 108 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ use crate::messages::BytesMutReader;
1414
use crate::pool::PoolSettings;
1515
use crate::sharding::Sharder;
1616

17+
use std::cmp;
1718
use std::collections::BTreeSet;
1819
use std::io::Cursor;
1920

@@ -114,7 +115,52 @@ impl QueryRouter {
114115

115116
let code = message_cursor.get_u8() as char;
116117

117-
// Only simple protocol supported for commands.
118+
// Check for any sharding regex matches in any queries
119+
match code as char {
120+
// For Parse and Query messages peek to see if they specify a shard_id as a comment early in the statement
121+
'P' | 'Q' => {
122+
if self.pool_settings.shard_id_regex.is_some()
123+
|| self.pool_settings.sharding_key_regex.is_some()
124+
{
125+
// Check only the first block of bytes configured by the pool settings
126+
let len = message_cursor.get_i32() as usize;
127+
let seg = cmp::min(len - 5, self.pool_settings.regex_search_limit);
128+
let initial_segment = String::from_utf8_lossy(&message_buffer[0..seg]);
129+
130+
// Check for a shard_id included in the query
131+
if let Some(shard_id_regex) = &self.pool_settings.shard_id_regex {
132+
let shard_id = shard_id_regex.captures(&initial_segment).and_then(|cap| {
133+
cap.get(1).and_then(|id| id.as_str().parse::<usize>().ok())
134+
});
135+
if let Some(shard_id) = shard_id {
136+
debug!("Setting shard to {:?}", shard_id);
137+
self.set_shard(shard_id);
138+
// Skip other command processing since a sharding command was found
139+
return None;
140+
}
141+
}
142+
143+
// Check for a sharding_key included in the query
144+
if let Some(sharding_key_regex) = &self.pool_settings.sharding_key_regex {
145+
let sharding_key =
146+
sharding_key_regex
147+
.captures(&initial_segment)
148+
.and_then(|cap| {
149+
cap.get(1).and_then(|id| id.as_str().parse::<i64>().ok())
150+
});
151+
if let Some(sharding_key) = sharding_key {
152+
debug!("Setting sharding_key to {:?}", sharding_key);
153+
self.set_sharding_key(sharding_key);
154+
// Skip other command processing since a sharding command was found
155+
return None;
156+
}
157+
}
158+
}
159+
}
160+
_ => {}
161+
}
162+
163+
// Only simple protocol supported for commands processed below
118164
if code != 'Q' {
119165
return None;
120166
}
@@ -192,13 +238,11 @@ impl QueryRouter {
192238

193239
match command {
194240
Command::SetShardingKey => {
195-
let sharder = Sharder::new(
196-
self.pool_settings.shards,
197-
self.pool_settings.sharding_function,
198-
);
199-
let shard = sharder.shard(value.parse::<i64>().unwrap());
200-
self.active_shard = Some(shard);
201-
value = shard.to_string();
241+
// TODO: some error handling here
242+
value = self
243+
.set_sharding_key(value.parse::<i64>().unwrap())
244+
.unwrap()
245+
.to_string();
202246
}
203247

204248
Command::SetShard => {
@@ -465,6 +509,16 @@ impl QueryRouter {
465509
}
466510
}
467511

512+
fn set_sharding_key(&mut self, sharding_key: i64) -> Option<usize> {
513+
let sharder = Sharder::new(
514+
self.pool_settings.shards,
515+
self.pool_settings.sharding_function,
516+
);
517+
let shard = sharder.shard(sharding_key);
518+
self.set_shard(shard);
519+
self.active_shard
520+
}
521+
468522
/// Get the current desired server role we should be talking to.
469523
pub fn role(&self) -> Option<Role> {
470524
self.active_role
@@ -775,6 +829,9 @@ mod test {
775829
healthcheck_delay: PoolSettings::default().healthcheck_delay,
776830
healthcheck_timeout: PoolSettings::default().healthcheck_timeout,
777831
ban_time: PoolSettings::default().ban_time,
832+
sharding_key_regex: None,
833+
shard_id_regex: None,
834+
regex_search_limit: 1000,
778835
};
779836
let mut qr = QueryRouter::new();
780837
assert_eq!(qr.active_role, None);
@@ -820,4 +877,47 @@ mod test {
820877
)));
821878
assert_eq!(qr.role(), Role::Primary);
822879
}
880+
881+
#[test]
882+
fn test_regex_shard_parsing() {
883+
QueryRouter::setup();
884+
885+
let pool_settings = PoolSettings {
886+
pool_mode: PoolMode::Transaction,
887+
load_balancing_mode: crate::config::LoadBalancingMode::Random,
888+
shards: 5,
889+
user: crate::config::User::default(),
890+
default_role: Some(Role::Replica),
891+
query_parser_enabled: true,
892+
primary_reads_enabled: false,
893+
sharding_function: ShardingFunction::PgBigintHash,
894+
automatic_sharding_key: Some(String::from("id")),
895+
healthcheck_delay: PoolSettings::default().healthcheck_delay,
896+
healthcheck_timeout: PoolSettings::default().healthcheck_timeout,
897+
ban_time: PoolSettings::default().ban_time,
898+
sharding_key_regex: Some(Regex::new(r"/\* sharding_key: (\d+) \*/").unwrap()),
899+
shard_id_regex: Some(Regex::new(r"/\* shard_id: (\d+) \*/").unwrap()),
900+
regex_search_limit: 1000,
901+
};
902+
let mut qr = QueryRouter::new();
903+
qr.update_pool_settings(pool_settings.clone());
904+
905+
// Shard should start out unset
906+
assert_eq!(qr.active_shard, None);
907+
908+
// Make sure setting it works
909+
let q1 = simple_query("/* shard_id: 1 */ select 1 from foo;");
910+
assert!(qr.try_execute_command(&q1) == None);
911+
assert_eq!(qr.active_shard, Some(1));
912+
913+
// And make sure changing it works
914+
let q2 = simple_query("/* shard_id: 0 */ select 1 from foo;");
915+
assert!(qr.try_execute_command(&q2) == None);
916+
assert_eq!(qr.active_shard, Some(0));
917+
918+
// Validate setting by shard with expected shard copied from sharding.rs tests
919+
let q2 = simple_query("/* sharding_key: 6 */ select 1 from foo;");
920+
assert!(qr.try_execute_command(&q2) == None);
921+
assert_eq!(qr.active_shard, Some(2));
922+
}
823923
}

0 commit comments

Comments
 (0)