Skip to content
This repository was archived by the owner on Sep 30, 2024. It is now read-only.

Commit 28348e7

Browse files
authored
feat/msp: allow enablement of logical replication features for Datastream (#63092)
Adds a new `postgreSQL.logicalReplication` configuration to allow MSP to generate prerequisite setup for integration with Datastream: https://cloud.google.com/datastream/docs/sources-postgresql. Integration with Datastream allows the Data Analytics team to self-serve data enrichment needs for the Telemetry V2 pipeline. Enabling this feature entails downtime (Cloud SQL instance restart), so enabling the logical replication feature at the Cloud SQL level (`cloudsql.logical_decoding`) is gated behind `postgreSQL.logicalReplication: {}`. Setting up the required stuff in Postgres is a bit complicated, requiring 3 Postgres provider instances: 1. The default admin one, authenticated with our admin user 2. New: a workload identity provider, using cyrilgdn/terraform-provider-postgresql#448 / sourcegraph/managed-services-platform-cdktf#11. This is required for creating a publication on selected tables, which requires being owner of said table. Because tables are created by application using e.g. auto-migrate, the workload identity is always the table owner, so we need to impersonate the IAM user 3. New: a "replication user" which is created with the replication permission. Replication seems to not be a propagated permission so we need a role/user that has replication enabled. A bit more context scattered here and there in the docstrings. Beyond the Postgres configuration we also introduce some additional resources to enable easy Datastream configuration: 1. Datastream Private Connection, which peers to the service private network 2. Cloud SQL Proxy VM, which only allows connections to `:5432` from the range specified in 1, allowing a connection to the Cloud SQL instance 2. Datastream Connection Profile attached to 1 From there, data team can click-ops or manage the Datastream Stream and BigQuery destination on their own. Closes CORE-165 Closes CORE-212 Sample config: ```yaml resources: postgreSQL: databases: - "primary" logicalReplication: publications: - name: testing database: primary tables: - users ``` ## Test plan sourcegraph/managed-services#1569 ## Changelog - MSP services can now configure `postgreSQL.logicalReplication` to enable Data Analytics team to replicate selected database tables into BigQuery.
1 parent f6fe8df commit 28348e7

File tree

16 files changed

+667
-25
lines changed

16 files changed

+667
-25
lines changed

deps.bzl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5809,8 +5809,8 @@ def go_dependencies():
58095809
name = "com_github_sourcegraph_managed_services_platform_cdktf_gen_postgresql",
58105810
build_file_proto_mode = "disable_global",
58115811
importpath = "github.com/sourcegraph/managed-services-platform-cdktf/gen/postgresql",
5812-
sum = "h1:Jy9vKM1mtyJYgx/DRSDftIuhL2MFO6esU84uj4deNn4=",
5813-
version = "v0.0.0-20240513203650-e2b1273f1c1a",
5812+
sum = "h1:t0hSCAvffnF3VAlSW3M9eeeubpMF6TieVc52vKLT98o=",
5813+
version = "v0.0.0-20240617210115-f286e77e83e8",
58145814
)
58155815
go_repository(
58165816
name = "com_github_sourcegraph_managed_services_platform_cdktf_gen_random",

dev/managedservicesplatform/internal/resource/cloudsql/cloudsql.go

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,13 @@ func New(scope constructs.Construct, id resourceid.ID, config Config) (*Output,
7777
Value: pointers.Stringf("%d", *config.Spec.MaxConnections),
7878
})
7979
}
80+
if config.Spec.LogicalReplication != nil {
81+
// https://cloud.google.com/sql/docs/postgres/replication/configure-logical-replication#set-up-native-postgresql-logical-replication
82+
databaseFlags = append(databaseFlags, sqldatabaseinstance.SqlDatabaseInstanceSettingsDatabaseFlags{
83+
Name: pointers.Ptr("cloudsql.logical_decoding"),
84+
Value: pointers.Ptr("on"),
85+
})
86+
}
8087

8188
instance := sqldatabaseinstance.NewSqlDatabaseInstance(scope, id.TerraformID("instance"), &sqldatabaseinstance.SqlDatabaseInstanceConfig{
8289
Project: &config.ProjectID,
@@ -148,7 +155,12 @@ func New(scope constructs.Construct, id resourceid.ID, config Config) (*Output,
148155
IpConfiguration: &sqldatabaseinstance.SqlDatabaseInstanceSettingsIpConfiguration{
149156
Ipv4Enabled: pointers.Ptr(true),
150157
PrivateNetwork: config.Network.Id(),
151-
RequireSsl: pointers.Ptr(true),
158+
159+
// https://cloud.google.com/sql/docs/postgres/admin-api/rest/v1beta4/instances#SslMode
160+
RequireSsl: pointers.Ptr(true),
161+
SslMode: pointers.Ptr("TRUSTED_CLIENT_CERTIFICATE_REQUIRED"),
162+
163+
EnablePrivatePathForGoogleCloudServices: pointers.Ptr(true),
152164
},
153165
},
154166

@@ -194,6 +206,7 @@ func New(scope constructs.Construct, id resourceid.ID, config Config) (*Output,
194206
Length: pointers.Float64(32),
195207
Special: pointers.Ptr(false),
196208
})
209+
// sqluser.NewSqlUser has 'cloudsqlsuperuser' by default
197210
adminUser := sqluser.NewSqlUser(scope, id.TerraformID("admin_user"), &sqluser.SqlUserConfig{
198211
Instance: instance.Name(),
199212
Project: &config.ProjectID,
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
load("@io_bazel_rules_go//go:def.bzl", "go_library")
2+
3+
go_library(
4+
name = "datastreamconnection",
5+
srcs = ["datastreamconnection.go"],
6+
importpath = "github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/internal/resource/datastreamconnection",
7+
visibility = ["//dev/managedservicesplatform:__subpackages__"],
8+
deps = [
9+
"//dev/managedservicesplatform/internal/resource/cloudsql",
10+
"//dev/managedservicesplatform/internal/resource/postgresqllogicalreplication",
11+
"//dev/managedservicesplatform/internal/resource/privatenetwork",
12+
"//dev/managedservicesplatform/internal/resource/serviceaccount",
13+
"//dev/managedservicesplatform/internal/resourceid",
14+
"//lib/pointers",
15+
"@com_github_aws_constructs_go_constructs_v10//:constructs",
16+
"@com_github_hashicorp_terraform_cdk_go_cdktf//:cdktf",
17+
"@com_github_sourcegraph_managed_services_platform_cdktf_gen_google//computefirewall",
18+
"@com_github_sourcegraph_managed_services_platform_cdktf_gen_google//computeinstance",
19+
"@com_github_sourcegraph_managed_services_platform_cdktf_gen_google//datastreamconnectionprofile",
20+
"@com_github_sourcegraph_managed_services_platform_cdktf_gen_google//datastreamprivateconnection",
21+
],
22+
)
Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
package datastreamconnection
2+
3+
import (
4+
"fmt"
5+
6+
"github.com/aws/constructs-go/constructs/v10"
7+
"github.com/hashicorp/terraform-cdk-go/cdktf"
8+
9+
"github.com/sourcegraph/managed-services-platform-cdktf/gen/google/computefirewall"
10+
"github.com/sourcegraph/managed-services-platform-cdktf/gen/google/computeinstance"
11+
"github.com/sourcegraph/managed-services-platform-cdktf/gen/google/datastreamprivateconnection"
12+
13+
"github.com/sourcegraph/managed-services-platform-cdktf/gen/google/datastreamconnectionprofile"
14+
15+
"github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/internal/resource/cloudsql"
16+
"github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/internal/resource/postgresqllogicalreplication"
17+
"github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/internal/resource/privatenetwork"
18+
"github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/internal/resource/serviceaccount"
19+
"github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/internal/resourceid"
20+
"github.com/sourcegraph/sourcegraph/lib/pointers"
21+
)
22+
23+
type Config struct {
24+
VPC *privatenetwork.Output
25+
CloudSQL *cloudsql.Output
26+
// CloudSQLClientServiceAccount is used for establishing a proxy that can
27+
// connect to the Cloud SQL instance.
28+
CloudSQLClientServiceAccount serviceaccount.Output
29+
30+
Publications []postgresqllogicalreplication.PublicationOutput
31+
PublicationUserGrants []cdktf.ITerraformDependable
32+
}
33+
34+
type Output struct {
35+
}
36+
37+
// New provisions everything needed for Datastream to connect to Cloud SQL proxy:
38+
//
39+
// Datastream --peering-> Private Network -> Cloud SQL Proxy VM -> Cloud SQL
40+
//
41+
// We need an additional VM proxying connections to Cloud SQL because Datastream
42+
// and Cloud SQL both have their own internal VPCs, and we cannot transitively
43+
// peer them over the private network we manage.
44+
func New(scope constructs.Construct, id resourceid.ID, config Config) (*Output, error) {
45+
const proxyInstanceName = "msp-datastream-cloudsqlproxy"
46+
47+
cloudsqlproxyInstance := computeinstance.NewComputeInstance(scope, id.TerraformID("cloudsqlproxy"), &computeinstance.ComputeInstanceConfig{
48+
Name: pointers.Ptr(proxyInstanceName),
49+
Description: pointers.Ptr("Cloud SQL proxy to allow Datastream to connect to Cloud SQL over private network"),
50+
51+
// Just use a random zone in the same region as the Cloud SQL instance
52+
Zone: pointers.Stringf("%s-a", *config.CloudSQL.Instance.Region()),
53+
54+
MachineType: pointers.Ptr("e2-micro"),
55+
NetworkInterface: []computeinstance.ComputeInstanceNetworkInterface{{
56+
Network: config.VPC.Network.Name(),
57+
Subnetwork: config.VPC.Subnetwork.Name(),
58+
}},
59+
ServiceAccount: &computeinstance.ComputeInstanceServiceAccount{
60+
Email: &config.CloudSQLClientServiceAccount.Email,
61+
Scopes: &[]*string{pointers.Ptr("https://www.googleapis.com/auth/cloud-platform")},
62+
},
63+
BootDisk: &computeinstance.ComputeInstanceBootDisk{
64+
InitializeParams: &computeinstance.ComputeInstanceBootDiskInitializeParams{
65+
Image: pointers.Ptr("cos-cloud/cos-stable"),
66+
Size: pointers.Float64(10), // Gb
67+
},
68+
},
69+
Tags: &[]*string{pointers.Ptr(proxyInstanceName)},
70+
71+
// See docstring of newMetadataGCEContainerDeclaration for details about
72+
// the label and metadata.
73+
Labels: &map[string]*string{
74+
"container-vm": pointers.Ptr(proxyInstanceName),
75+
"msp": pointers.Ptr("true"),
76+
},
77+
Metadata: &map[string]*string{
78+
"gce-container-declaration": pointers.Ptr(
79+
newMetadataGCEContainerDeclaration(proxyInstanceName, *config.CloudSQL.Instance.ConnectionName())),
80+
},
81+
})
82+
83+
const dsPrivateConnectionSubnet = "10.126.0.0/29" // any '/29' range
84+
datastreamConnection := datastreamprivateconnection.NewDatastreamPrivateConnection(scope, id.TerraformID("cloudsqlproxy-privateconnection"), &datastreamprivateconnection.DatastreamPrivateConnectionConfig{
85+
DisplayName: pointers.Ptr(proxyInstanceName),
86+
PrivateConnectionId: pointers.Ptr(proxyInstanceName),
87+
Location: config.CloudSQL.Instance.Region(),
88+
VpcPeeringConfig: &datastreamprivateconnection.DatastreamPrivateConnectionVpcPeeringConfig{
89+
Vpc: config.VPC.Network.Id(),
90+
Subnet: pointers.Ptr(dsPrivateConnectionSubnet),
91+
},
92+
Labels: &map[string]*string{"msp": pointers.Ptr("true")},
93+
})
94+
95+
// Allow ingress from Datastream
96+
datastreamIngressFirewall := computefirewall.NewComputeFirewall(scope, id.TerraformID("cloudsqlproxy-firewall-datastream-ingress"), &computefirewall.ComputeFirewallConfig{
97+
Name: pointers.Stringf("%s-datastream-ingress", proxyInstanceName),
98+
Description: pointers.Ptr("Allow incoming connections from a Datastream private connection to the Cloud SQL Proxy VM"),
99+
Network: config.VPC.Network.Name(),
100+
Priority: pointers.Float64(1000),
101+
102+
Direction: pointers.Ptr("INGRESS"),
103+
SourceRanges: &[]*string{
104+
pointers.Ptr(dsPrivateConnectionSubnet),
105+
},
106+
Allow: []computefirewall.ComputeFirewallAllow{{
107+
Protocol: pointers.Ptr("tcp"),
108+
Ports: &[]*string{pointers.Ptr("5432")},
109+
}},
110+
TargetTags: cloudsqlproxyInstance.Tags(),
111+
})
112+
113+
// Allow IAP ingress for debug https://cloud.google.com/iap/docs/using-tcp-forwarding
114+
_ = computefirewall.NewComputeFirewall(scope, id.TerraformID("cloudsqlproxy-firewall-iap-ingress"), &computefirewall.ComputeFirewallConfig{
115+
Name: pointers.Stringf("%s-iap-ingress", proxyInstanceName),
116+
Description: pointers.Ptr("Allow incoming connections from GCP IAP to the Cloud SQL Proxy VM"),
117+
Network: config.VPC.Network.Name(),
118+
Priority: pointers.Float64(1000),
119+
120+
Direction: pointers.Ptr("INGRESS"),
121+
SourceRanges: &[]*string{
122+
pointers.Ptr("35.235.240.0/20"),
123+
},
124+
Allow: []computefirewall.ComputeFirewallAllow{{
125+
Protocol: pointers.Ptr("tcp"),
126+
Ports: &[]*string{pointers.Ptr("22")},
127+
}},
128+
TargetTags: cloudsqlproxyInstance.Tags(),
129+
})
130+
131+
for _, pub := range config.Publications {
132+
id := id.Group(pub.Name)
133+
134+
// The Datastream Connection Profile is what the data team will click-ops
135+
// during their creation of the actual Datastream "Stream".
136+
// https://cloud.google.com/datastream/docs/create-a-stream
137+
//
138+
// This is where we stop managing things directly in MSP.
139+
_ = datastreamconnectionprofile.NewDatastreamConnectionProfile(scope, id.TerraformID("cloudsqlproxy-connectionprofile"), &datastreamconnectionprofile.DatastreamConnectionProfileConfig{
140+
DisplayName: pointers.Stringf("MSP Publication - %s", pub.Name),
141+
ConnectionProfileId: pointers.Stringf("msp-publication-%s", pub.Name),
142+
Labels: &map[string]*string{
143+
"msp": pointers.Ptr("true"),
144+
"database": pointers.Ptr(pub.Database),
145+
"pg_replication_slot": pub.ReplicationSlotName,
146+
"pg_publication": pub.PublicationName,
147+
},
148+
Location: config.CloudSQL.Instance.Region(),
149+
PostgresqlProfile: &datastreamconnectionprofile.DatastreamConnectionProfilePostgresqlProfile{
150+
Hostname: cloudsqlproxyInstance.NetworkInterface().
151+
Get(pointers.Float64(0)).
152+
NetworkIp(), // internal IP of the instance
153+
Port: pointers.Float64(5432),
154+
155+
Database: pointers.Ptr(pub.Database),
156+
Username: pub.User.Name(),
157+
Password: pub.User.Password(),
158+
},
159+
PrivateConnectivity: &datastreamconnectionprofile.DatastreamConnectionProfilePrivateConnectivity{
160+
PrivateConnection: datastreamConnection.Name(),
161+
},
162+
DependsOn: pointers.Ptr(append(config.PublicationUserGrants,
163+
datastreamIngressFirewall)),
164+
})
165+
}
166+
167+
return &Output{}, nil
168+
}
169+
170+
// newMetadataGCEContainerDeclaration recreates the metadata value that GCP
171+
// provides when you click-ops a Compute Engine VM that runs a container. GCP
172+
// manages the container lifecycle which is quite nice. Sadly this isn't
173+
// available via an official Terraform API, but we can replicate that GCP does
174+
// and hope they don't change anything.
175+
func newMetadataGCEContainerDeclaration(containerName, cloudSQLConnectionString string) string {
176+
// Note the docstring about how this format is not a public API - it's
177+
// generated by GCP, and we include that as well
178+
return fmt.Sprintf(`
179+
spec:
180+
restartPolicy: Always
181+
containers:
182+
- name: %s
183+
image: gcr.io/cloud-sql-connectors/cloud-sql-proxy
184+
args:
185+
- '--auto-iam-authn'
186+
- '--private-ip'
187+
- '--address=0.0.0.0'
188+
- '%s'
189+
190+
# This container declaration format is not public API and may change without notice. Please
191+
# use gcloud command-line tool or Google Cloud Console to run Containers on Google Compute Engine.`,
192+
containerName, cloudSQLConnectionString)
193+
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
load("@io_bazel_rules_go//go:def.bzl", "go_library")
2+
3+
go_library(
4+
name = "postgresqllogicalreplication",
5+
srcs = ["postgresqllogicalreplication.go"],
6+
importpath = "github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/internal/resource/postgresqllogicalreplication",
7+
visibility = ["//dev/managedservicesplatform:__subpackages__"],
8+
deps = [
9+
"//dev/managedservicesplatform/internal/resource/cloudsql",
10+
"//dev/managedservicesplatform/internal/resourceid",
11+
"//dev/managedservicesplatform/spec",
12+
"//lib/pointers",
13+
"@com_github_aws_constructs_go_constructs_v10//:constructs",
14+
"@com_github_hashicorp_terraform_cdk_go_cdktf//:cdktf",
15+
"@com_github_sourcegraph_managed_services_platform_cdktf_gen_postgresql//publication",
16+
"@com_github_sourcegraph_managed_services_platform_cdktf_gen_postgresql//replicationslot",
17+
"@com_github_sourcegraph_managed_services_platform_cdktf_gen_postgresql//role",
18+
"@com_github_sourcegraph_managed_services_platform_cdktf_gen_random//password",
19+
],
20+
)

0 commit comments

Comments
 (0)