Skip to content

Commit 15193a5

Browse files
authored
Merge pull request #335 from FirebasePrivate/fz/change-tracker-config-cleanup
Clean up FirestoreBigQueryChangeTracker.
2 parents d6e2938 + 704aa48 commit 15193a5

File tree

5 files changed

+81
-124
lines changed

5 files changed

+81
-124
lines changed

firestore-bigquery-export/firestore-bigquery-change-tracker/src/bigquery/index.ts

Lines changed: 78 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -25,136 +25,102 @@ import * as logs from "../logs";
2525
import { BigQuery } from "@google-cloud/bigquery";
2626

2727
export interface FirestoreBigQueryEventHistoryTrackerConfig {
28-
collectionPath: string;
2928
datasetId: string;
30-
initialized: boolean;
31-
suppressWarnings: boolean;
29+
tableId: string;
3230
}
3331

3432
/**
35-
* An interface to BigQuery which handles:
36-
* - Iniitializing the raw changelog table when the first event gets recorded.
37-
* - Initializing the latest view over the raw changelog.
38-
* - Streaming writes into the raw changelog table.
33+
* An FirestoreEventHistoryTracker that exports data to BigQuery.
34+
*
35+
* When the first event is received, it creates necessary BigQuery resources:
36+
* - Dataset: {@link FirestoreBigQueryEventHistoryTrackerConfig#datasetId}.
37+
* - Table: Raw change log table {@link FirestoreBigQueryEventHistoryTracker#rawChangeLogTableName}.
38+
* - View: Latest view {@link FirestoreBigQueryEventHistoryTracker#rawLatestView}.
39+
* If any subsequent data export fails, it will attempt to reinitialize.
3940
*/
4041
export class FirestoreBigQueryEventHistoryTracker implements FirestoreEventHistoryTracker {
4142
bq: bigquery.BigQuery;
42-
tableName: string;
43-
initialized: boolean;
44-
suppressWarnings: boolean;
43+
initialized: boolean = false;
4544

4645
constructor(public config: FirestoreBigQueryEventHistoryTrackerConfig) {
4746
this.bq = new bigquery.BigQuery();
48-
this.initialized = config.initialized;
49-
this.tableName = config.collectionPath.replace(/\//g, "_");
50-
this.suppressWarnings = config.suppressWarnings;
5147
}
5248

5349
async record(events: FirestoreDocumentChangeEvent[]) {
54-
if (!this.config.initialized) {
55-
try {
56-
await this.initialize(this.config.datasetId, this.tableName);
57-
this.initialized = true;
58-
} catch (e) {
59-
logs.bigQueryErrorRecordingDocumentChange(e);
60-
}
61-
}
50+
await this.initialize();
51+
6252
const rows = events.map(event => {
63-
return this.buildDataRow(
64-
// Use the function's event ID to protect against duplicate executions
65-
event.eventId,
66-
event.operation,
67-
event.timestamp,
68-
event.documentName,
69-
event.data);
53+
// This must match firestoreToBQTable().
54+
return {
55+
timestamp: event.timestamp,
56+
event_id: event.eventId,
57+
document_name: event.documentName,
58+
operation: ChangeType[event.operation],
59+
data: JSON.stringify(event.data),
60+
};
7061
});
71-
await this.insertData(this.config.datasetId, this.tableName, rows);
62+
await this.insertData(rows);
7263
}
7364

7465
/**
75-
* Ensure that the defined Firestore schema exists within BigQuery and
76-
* contains the correct information. This is invoked for the first time when
77-
* the first document change event is recorded.
66+
* Inserts rows of data into the BigQuery raw change log table.
7867
*/
79-
async initialize(datasetId: string, tableName: string) {
80-
const rawTable = raw(tableName);
81-
82-
await this.initializeDataset(datasetId);
83-
await this.initializeChangelog(datasetId, rawTable);
84-
await this.initializeLatestView(datasetId, rawTable);
85-
};
86-
87-
buildDataRow(
88-
eventId: string,
89-
changeType: ChangeType,
90-
timestamp: string,
91-
document_name: string,
92-
data?: Object
93-
): bigquery.RowMetadata {
94-
// This must match firestoreToBQTable().
95-
return {
96-
timestamp: timestamp,
97-
event_id: eventId,
98-
document_name: document_name,
99-
operation: ChangeType[changeType],
100-
data: JSON.stringify(data),
101-
};
102-
};
68+
private async insertData(rows: bigquery.RowMetadata[]) {
69+
try {
70+
const dataset = this.bq.dataset(this.config.datasetId);
71+
const table = dataset.table(this.rawChangeLogTableName());
72+
logs.dataInserting(rows.length);
73+
await table.insert(rows);
74+
logs.dataInserted(rows.length);
75+
} catch (e) {
76+
// Reinitializing in case the destintation table is modified.
77+
this.initialized = false;
78+
throw e;
79+
}
80+
}
10381

10482
/**
105-
* Insert a row of data into the BigQuery `raw` data table
83+
* Creates the BigQuery resources with the expected schema for {@link FirestoreEventHistoryTracker}.
84+
* After the first invokation, it skips initialization assuming these resources are still there.
10685
*/
107-
async insertData(
108-
datasetId: string,
109-
collectionTableName: string,
110-
rows: bigquery.RowMetadata[]
111-
) {
112-
const name = changeLog(raw(collectionTableName));
113-
const dataset = this.bq.dataset(datasetId);
114-
const table = dataset.table(name);
115-
const rowCount = rows.length;
116-
117-
logs.dataInserting(rowCount);
118-
await table.insert(rows);
119-
logs.dataInserted(rowCount);
120-
};
86+
private async initialize() {
87+
if (this.initialized) {
88+
return;
89+
}
90+
await this.initializeDataset();
91+
await this.initializeRawChangeLogTable();
92+
await this.initializeLatestView();
93+
this.initialized = true;
94+
}
12195

12296
/**
123-
* Check that the specified dataset exists, and create it if it doesn't.
97+
* Creates the specified dataset if it doesn't already exists.
12498
*/
125-
async initializeDataset(datasetId: string): Promise<bigquery.Dataset> {
126-
const dataset = this.bq.dataset(datasetId);
99+
private async initializeDataset() {
100+
const dataset = this.bq.dataset(this.config.datasetId);
127101
const [datasetExists] = await dataset.exists();
128102
if (datasetExists) {
129-
if (!this.suppressWarnings) {
130-
logs.bigQueryDatasetExists(datasetId);
131-
}
103+
logs.bigQueryDatasetExists(this.config.datasetId);
132104
} else {
133-
logs.bigQueryDatasetCreating(datasetId);
105+
logs.bigQueryDatasetCreating(this.config.datasetId);
134106
await dataset.create();
135-
logs.bigQueryDatasetCreated(datasetId);
107+
logs.bigQueryDatasetCreated(this.config.datasetId);
136108
}
137109
return dataset;
138-
};
110+
}
139111

140112
/**
141-
* Check that the table exists within the specified dataset, and create it
142-
* if it doesn't. If the table does exist, validate that the BigQuery schema
143-
* is correct and add any missing fields.
113+
* Creates the raw change log table if it doesn't already exist.
114+
* TODO: Validate that the BigQuery schema is correct if the table does exist,
144115
*/
145-
async initializeChangelog(
146-
datasetId: string,
147-
tableName: string,
148-
): Promise<bigquery.Table> {
149-
const changelogName = changeLog(tableName);
150-
const dataset = this.bq.dataset(datasetId);
151-
let table = dataset.table(changelogName);
116+
private async initializeRawChangeLogTable() {
117+
const changelogName = this.rawChangeLogTableName();
118+
const dataset = this.bq.dataset(this.config.datasetId);
119+
const table = dataset.table(changelogName);
152120
const [tableExists] = await table.exists();
153121

154122
if (tableExists) {
155-
if (!this.suppressWarnings) {
156-
logs.bigQueryTableAlreadyExists(table.id, dataset.id);
157-
}
123+
logs.bigQueryTableAlreadyExists(table.id, dataset.id);
158124
} else {
159125
logs.bigQueryTableCreating(changelogName);
160126
const options = {
@@ -169,37 +135,35 @@ export class FirestoreBigQueryEventHistoryTracker implements FirestoreEventHisto
169135
};
170136

171137
/**
172-
* Create a view over a table storing a change log of Firestore documents
173-
* which contains only latest version of all live documents in the mirrored
174-
* collection.
138+
* Creates the latest snapshot view, which returns only latest operations
139+
* of all existing documents over the raw change log table.
175140
*/
176-
async initializeLatestView(
177-
datasetId: string,
178-
tableName: string
179-
): Promise<bigquery.Table> {
180-
let viewName = latest(tableName);
181-
const dataset = this.bq.dataset(datasetId);
182-
let view = dataset.table(viewName);
141+
private async initializeLatestView() {
142+
const dataset = this.bq.dataset(this.config.datasetId);
143+
const view = dataset.table(this.rawLatestView());
183144
const [viewExists] = await view.exists();
184145

185146
if (viewExists) {
186-
if (!this.suppressWarnings) {
187-
logs.bigQueryViewAlreadyExists(view.id, dataset.id);
188-
}
147+
logs.bigQueryViewAlreadyExists(view.id, dataset.id);
189148
} else {
190-
const latestSnapshot = latestConsistentSnapshotView(datasetId, changeLog(tableName));
191-
logs.bigQueryViewCreating(viewName, latestSnapshot.query);
149+
const latestSnapshot = latestConsistentSnapshotView(this.config.datasetId, this.rawChangeLogTableName());
150+
logs.bigQueryViewCreating(this.rawLatestView(), latestSnapshot.query);
192151
const options = {
193-
friendlyName: viewName,
152+
friendlyName: this.rawLatestView(),
194153
view: latestSnapshot,
195154
};
196155
await view.create(options);
197-
logs.bigQueryViewCreated(viewName);
156+
logs.bigQueryViewCreated(this.rawLatestView());
198157
}
199158
return view;
200-
};
159+
}
160+
161+
private rawChangeLogTableName(): string {
162+
return `${this.config.tableId}_raw_changelog`;
163+
}
164+
165+
private rawLatestView(): string {
166+
return `${this.config.tableId}_raw_latest`;
167+
}
201168
}
202169

203-
export function raw(tableName: string): string { return `${tableName}_raw`; };
204-
export function changeLog(tableName: string): string { return `${tableName}_changelog`; }
205-
export function latest(tableName: string): string { return `${tableName}_latest`; };

firestore-bigquery-export/firestore-bigquery-change-tracker/src/bigquery/schema.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
import * as bigquery from "@google-cloud/bigquery";
1818
import * as errors from "../errors";
1919
import * as logs from "../logs";
20-
import { changeLog, raw, latest } from "../bigquery";
2120
import * as sqlFormatter from "sql-formatter";
2221

2322
export type BigQueryFieldMode = "NULLABLE" | "REPEATED" | "REQUIRED";

firestore-bigquery-export/functions/lib/index.js

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,8 @@ const firestore_bigquery_change_tracker_1 = require("@firebaseextensions/firesto
3030
const logs = require("./logs");
3131
const util_1 = require("./util");
3232
const eventTracker = new firestore_bigquery_change_tracker_1.FirestoreBigQueryEventHistoryTracker({
33-
collectionPath: config_1.default.tableId,
33+
tableId: config_1.default.tableId,
3434
datasetId: config_1.default.datasetId,
35-
initialized: false,
36-
suppressWarnings: false,
3735
});
3836
logs.init();
3937
exports.fsexportbigquery = functions.handler.firestore.document.onWrite((change, context) => __awaiter(void 0, void 0, void 0, function* () {

firestore-bigquery-export/functions/src/index.ts

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,8 @@ import * as logs from "./logs";
2525
import { getChangeType } from "./util";
2626

2727
const eventTracker: FirestoreEventHistoryTracker = new FirestoreBigQueryEventHistoryTracker({
28-
collectionPath: config.tableId, // TODO: rename collectionPath.
28+
tableId: config.tableId,
2929
datasetId: config.datasetId,
30-
initialized: false, // TODO: Remove this config.
31-
suppressWarnings: false, // TODO: Remove this config.
3230
});
3331

3432
logs.init();

firestore-bigquery-export/scripts/import/src/index.ts

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -126,10 +126,8 @@ const run = async (): Promise<number> => {
126126
process.env.GOOGLE_CLOUD_PROJECT = projectId;
127127

128128
const dataSink = new FirestoreBigQueryEventHistoryTracker({
129-
collectionPath: destinationCollectionPath,
129+
tableId: destinationCollectionPath,
130130
datasetId: datasetId,
131-
initialized: false,
132-
suppressWarnings: true,
133131
});
134132

135133
console.log(

0 commit comments

Comments
 (0)