|
1 | 1 | import { strict as assert } from "assert";
|
2 |
| -import { Page, extractFrontMatter } from "mongodb-rag-core"; |
3 | 2 | import {
|
4 | 3 | DataSource,
|
5 |
| - makeGitDataSource, |
6 | 4 | MakeMdOnGithubDataSourceParams,
|
7 | 5 | makeMdOnGithubDataSource,
|
8 |
| - removeMarkdownImagesAndLinks, |
9 | 6 | } from "mongodb-rag-core/dataSources";
|
10 | 7 | import { prismaSourceConstructor } from "./prisma";
|
11 | 8 | import { wiredTigerSourceConstructor } from "./wiredTiger";
|
@@ -142,58 +139,31 @@ const mongoDbUniMetadataSource = async () => {
|
142 | 139 | );
|
143 | 140 | };
|
144 | 141 |
|
145 |
| -export const terraformProviderSourceConstructor = async () => { |
146 |
| - const siteBaseUrl = |
147 |
| - "https://registry.terraform.io/providers/mongodb/mongodbatlas/latest/docs"; |
148 |
| - return await makeGitDataSource<SourceTypeName>({ |
| 142 | +export const terraformProviderSourceConfig: MakeMdOnGithubDataSourceParams<SourceTypeName> = |
| 143 | + { |
149 | 144 | name: "atlas-terraform-provider",
|
150 |
| - repoUri: "https://github.com/mongodb/terraform-provider-mongodbatlas.git", |
151 |
| - repoOptions: { |
152 |
| - "--depth": 1, |
153 |
| - "--branch": "master", |
| 145 | + repoUrl: "https://github.com/mongodb/terraform-provider-mongodbatlas.git", |
| 146 | + repoLoaderOptions: { |
| 147 | + branch: "master", |
| 148 | + }, |
| 149 | + pathToPageUrl(pathInRepo, _) { |
| 150 | + const siteBaseUrl = |
| 151 | + "https://registry.terraform.io/providers/mongodb/mongodbatlas/latest/docs"; |
| 152 | + return siteBaseUrl + pathInRepo.replace("docs/", "").replace(".md", ""); |
154 | 153 | },
|
| 154 | + filter: (path: string) => path.includes("docs") && path.endsWith(".md"), |
155 | 155 | sourceType: "tech-docs-external",
|
156 | 156 | metadata: {
|
157 | 157 | productName: "mongodbatlas Terraform Provider",
|
158 | 158 | tags: ["docs", "terraform", "atlas", "hcl"],
|
159 | 159 | },
|
160 |
| - filter: (path: string) => |
161 |
| - path.includes("website/docs") && path.endsWith(".markdown"), |
162 |
| - handlePage: async function (path, content) { |
163 |
| - const { metadata, body } = extractFrontMatter<{ page_title: string }>( |
164 |
| - content |
165 |
| - ); |
166 |
| - const url = getTerraformPageUrl(siteBaseUrl, path); |
167 |
| - |
168 |
| - const page: Omit<Page<SourceTypeName>, "sourceName"> = { |
169 |
| - body: removeMarkdownImagesAndLinks(body), |
170 |
| - format: "md", |
171 |
| - url: url, |
172 |
| - title: metadata?.page_title, |
173 |
| - }; |
174 |
| - return page; |
175 |
| - }, |
176 |
| - }); |
| 160 | + }; |
| 161 | +const terraformProviderDataSource = async () => { |
| 162 | + return await makeMdOnGithubDataSource<SourceTypeName>( |
| 163 | + terraformProviderSourceConfig |
| 164 | + ); |
177 | 165 | };
|
178 | 166 |
|
179 |
| -function getTerraformPageUrl(siteBaseUrl: string, path: string) { |
180 |
| - if (path.includes("website/docs/d/")) { |
181 |
| - return ( |
182 |
| - siteBaseUrl + |
183 |
| - path.replace("website/docs/d", "data-sources").replace(".markdown", "") |
184 |
| - ); |
185 |
| - } else if (path.includes("website/docs/r/")) { |
186 |
| - return ( |
187 |
| - siteBaseUrl + |
188 |
| - path.replace("website/docs/r", "resources").replace(".markdown", "") |
189 |
| - ); |
190 |
| - } else { |
191 |
| - return ( |
192 |
| - siteBaseUrl + path.replace("website/docs/", "").replace(".markdown", "") |
193 |
| - ); |
194 |
| - } |
195 |
| -} |
196 |
| - |
197 | 167 | const webDataSourceConstructor = async (): Promise<DataSource[]> => {
|
198 | 168 | const sitemapUrls = await getUrlsFromSitemap(
|
199 | 169 | "https://www.mongodb.com/sitemap-pages.xml"
|
@@ -229,6 +199,6 @@ export const sourceConstructors: SourceConstructor[] = [
|
229 | 199 | mongoDbCorpDataSource,
|
230 | 200 | mongoDbUniMetadataSource,
|
231 | 201 | practicalAggregationsDataSource,
|
232 |
| - terraformProviderSourceConstructor, |
| 202 | + terraformProviderDataSource, |
233 | 203 | wiredTigerSourceConstructor,
|
234 | 204 | ];
|
0 commit comments