Skip to content

Commit bd429f4

Browse files
authored
EAI-1054 Fix ingestion of atlas-terraform-provider (#761)
* Ingest from the correct path * Capture correct title and URL * Linting
1 parent bc43d82 commit bd429f4

File tree

1 file changed

+17
-47
lines changed
  • packages/ingest-mongodb-public/src/sources

1 file changed

+17
-47
lines changed

packages/ingest-mongodb-public/src/sources/index.ts

Lines changed: 17 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,8 @@
11
import { strict as assert } from "assert";
2-
import { Page, extractFrontMatter } from "mongodb-rag-core";
32
import {
43
DataSource,
5-
makeGitDataSource,
64
MakeMdOnGithubDataSourceParams,
75
makeMdOnGithubDataSource,
8-
removeMarkdownImagesAndLinks,
96
} from "mongodb-rag-core/dataSources";
107
import { prismaSourceConstructor } from "./prisma";
118
import { wiredTigerSourceConstructor } from "./wiredTiger";
@@ -142,58 +139,31 @@ const mongoDbUniMetadataSource = async () => {
142139
);
143140
};
144141

145-
export const terraformProviderSourceConstructor = async () => {
146-
const siteBaseUrl =
147-
"https://registry.terraform.io/providers/mongodb/mongodbatlas/latest/docs";
148-
return await makeGitDataSource<SourceTypeName>({
142+
export const terraformProviderSourceConfig: MakeMdOnGithubDataSourceParams<SourceTypeName> =
143+
{
149144
name: "atlas-terraform-provider",
150-
repoUri: "https://github.com/mongodb/terraform-provider-mongodbatlas.git",
151-
repoOptions: {
152-
"--depth": 1,
153-
"--branch": "master",
145+
repoUrl: "https://github.com/mongodb/terraform-provider-mongodbatlas.git",
146+
repoLoaderOptions: {
147+
branch: "master",
148+
},
149+
pathToPageUrl(pathInRepo, _) {
150+
const siteBaseUrl =
151+
"https://registry.terraform.io/providers/mongodb/mongodbatlas/latest/docs";
152+
return siteBaseUrl + pathInRepo.replace("docs/", "").replace(".md", "");
154153
},
154+
filter: (path: string) => path.includes("docs") && path.endsWith(".md"),
155155
sourceType: "tech-docs-external",
156156
metadata: {
157157
productName: "mongodbatlas Terraform Provider",
158158
tags: ["docs", "terraform", "atlas", "hcl"],
159159
},
160-
filter: (path: string) =>
161-
path.includes("website/docs") && path.endsWith(".markdown"),
162-
handlePage: async function (path, content) {
163-
const { metadata, body } = extractFrontMatter<{ page_title: string }>(
164-
content
165-
);
166-
const url = getTerraformPageUrl(siteBaseUrl, path);
167-
168-
const page: Omit<Page<SourceTypeName>, "sourceName"> = {
169-
body: removeMarkdownImagesAndLinks(body),
170-
format: "md",
171-
url: url,
172-
title: metadata?.page_title,
173-
};
174-
return page;
175-
},
176-
});
160+
};
161+
const terraformProviderDataSource = async () => {
162+
return await makeMdOnGithubDataSource<SourceTypeName>(
163+
terraformProviderSourceConfig
164+
);
177165
};
178166

179-
function getTerraformPageUrl(siteBaseUrl: string, path: string) {
180-
if (path.includes("website/docs/d/")) {
181-
return (
182-
siteBaseUrl +
183-
path.replace("website/docs/d", "data-sources").replace(".markdown", "")
184-
);
185-
} else if (path.includes("website/docs/r/")) {
186-
return (
187-
siteBaseUrl +
188-
path.replace("website/docs/r", "resources").replace(".markdown", "")
189-
);
190-
} else {
191-
return (
192-
siteBaseUrl + path.replace("website/docs/", "").replace(".markdown", "")
193-
);
194-
}
195-
}
196-
197167
const webDataSourceConstructor = async (): Promise<DataSource[]> => {
198168
const sitemapUrls = await getUrlsFromSitemap(
199169
"https://www.mongodb.com/sitemap-pages.xml"
@@ -229,6 +199,6 @@ export const sourceConstructors: SourceConstructor[] = [
229199
mongoDbCorpDataSource,
230200
mongoDbUniMetadataSource,
231201
practicalAggregationsDataSource,
232-
terraformProviderSourceConstructor,
202+
terraformProviderDataSource,
233203
wiredTigerSourceConstructor,
234204
];

0 commit comments

Comments
 (0)