Skip to content

Commit b1fbf78

Browse files
codydeBYK
authored andcommitted
feat(ai): Add .md extension to provide pages in markdown for LLMs (#13994)
Adds support for `.md` at the end of every pre-rendered path. Does this by a hack where we rewrite those paths to `public/md-exports/...`. The contents of this directory are generated _after_ `next build` by scraping all html files under `.next/server/app` Not ideal but looks like the easiest path for now. --------- Co-authored-by: Burak Yigit Kaya <byk@sentry.io>
1 parent 1808b3e commit b1fbf78

File tree

11 files changed

+378
-15
lines changed

11 files changed

+378
-15
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@ npm-debug.log*
44
yarn-debug.log*
55
yarn-error.log*
66

7+
# Ignore generated export markdown files
8+
/public/md-exports/
9+
710
# Runtime data
811
pids
912
*.pid

next.config.ts

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import {codecovNextJSWebpackPlugin} from '@codecov/nextjs-webpack-plugin';
22
import {withSentryConfig} from '@sentry/nextjs';
33

4-
import {redirects} from './redirects';
4+
import {redirects} from './redirects.js';
55

66
const outputFileTracingExcludes = process.env.NEXT_PUBLIC_DEVELOPER_DOCS
77
? {
@@ -55,6 +55,12 @@ const nextConfig = {
5555
DEVELOPER_DOCS_: process.env.NEXT_PUBLIC_DEVELOPER_DOCS,
5656
},
5757
redirects,
58+
rewrites: async () => [
59+
{
60+
source: '/:path*.md',
61+
destination: '/md-exports/:path*.md',
62+
},
63+
],
5864
sassOptions: {
5965
silenceDeprecations: ['legacy-js-api'],
6066
},

package.json

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,10 @@
1818
"dev": "yarn enforce-redirects && concurrently \"yarn sidecar\" \"node ./src/hotReloadWatcher.mjs\" \"next dev\"",
1919
"dev:developer-docs": "yarn enforce-redirects && NEXT_PUBLIC_DEVELOPER_DOCS=1 yarn dev",
2020
"build:developer-docs": "yarn enforce-redirects && git submodule init && git submodule update && NEXT_PUBLIC_DEVELOPER_DOCS=1 yarn build",
21-
"build": "yarn enforce-redirects && next build",
21+
"build": "yarn enforce-redirects && next build && yarn generate-md-exports",
22+
"generate-md-exports": "node scripts/generate-md-exports.mjs",
2223
"vercel:build:developer-docs": "yarn enforce-redirects && git submodule init && git submodule update && NEXT_PUBLIC_DEVELOPER_DOCS=1 yarn build",
24+
"start:dev": "NODE_ENV=development yarn build && yarn start",
2325
"start": "next start",
2426
"lint": "next lint",
2527
"lint:ts": "tsc --skipLibCheck",
@@ -61,6 +63,7 @@
6163
"framer-motion": "^10.12.16",
6264
"github-slugger": "^2.0.0",
6365
"gray-matter": "^4.0.3",
66+
"hast-util-select": "^6.0.4",
6467
"hast-util-to-jsx-runtime": "^2.3.2",
6568
"hastscript": "^8.0.0",
6669
"image-size": "^1.2.1",
@@ -85,21 +88,26 @@
8588
"react-select": "^5.7.3",
8689
"react-textarea-autosize": "^8.5.3",
8790
"rehype-autolink-headings": "^7.1.0",
91+
"rehype-parse": "^9.0.1",
8892
"rehype-preset-minify": "^7.0.0",
8993
"rehype-prism-diff": "^1.1.2",
9094
"rehype-prism-plus": "^1.6.3",
95+
"rehype-remark": "^10.0.1",
9196
"rehype-stringify": "^10.0.0",
92-
"remark-gfm": "^4.0.0",
97+
"remark-gfm": "^4.0.1",
9398
"remark-mdx-images": "^3.0.0",
9499
"remark-parse": "^11.0.0",
95100
"remark-prism": "^1.3.6",
101+
"remark-stringify": "^11.0.0",
96102
"rss": "^1.2.2",
97103
"sass": "^1.69.5",
98104
"search-insights": "^2.17.2",
99105
"server-only": "^0.0.1",
100106
"sharp": "^0.33.4",
101107
"tailwindcss-scoped-preflight": "^3.0.4",
102-
"textarea-markdown-editor": "^1.0.4"
108+
"textarea-markdown-editor": "^1.0.4",
109+
"unified": "^11.0.5",
110+
"unist-util-remove": "^4.0.0"
103111
},
104112
"devDependencies": {
105113
"@babel/preset-typescript": "^7.15.0",

scripts/generate-md-exports.mjs

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
#!/usr/bin/env node
2+
3+
import {fileURLToPath} from 'url';
4+
5+
import {selectAll} from 'hast-util-select';
6+
import {existsSync} from 'node:fs';
7+
import {mkdir, opendir, readFile, rm, writeFile} from 'node:fs/promises';
8+
import {cpus} from 'node:os';
9+
import * as path from 'node:path';
10+
import {isMainThread, parentPort, Worker, workerData} from 'node:worker_threads';
11+
import rehypeParse from 'rehype-parse';
12+
import rehypeRemark from 'rehype-remark';
13+
import remarkGfm from 'remark-gfm';
14+
import remarkStringify from 'remark-stringify';
15+
import {unified} from 'unified';
16+
import {remove} from 'unist-util-remove';
17+
18+
async function createWork() {
19+
let root = process.cwd();
20+
while (!existsSync(path.join(root, 'package.json'))) {
21+
const parent = path.dirname(root);
22+
if (parent === root) {
23+
throw new Error('Could not find package.json in parent directories');
24+
}
25+
root = parent;
26+
}
27+
const INPUT_DIR = path.join(root, '.next', 'server', 'app');
28+
const OUTPUT_DIR = path.join(root, 'public', 'md-exports');
29+
30+
console.log(`🚀 Starting markdown generation from: ${INPUT_DIR}`);
31+
console.log(`📁 Output directory: ${OUTPUT_DIR}`);
32+
33+
// Clear output directory
34+
await rm(OUTPUT_DIR, {recursive: true, force: true});
35+
await mkdir(OUTPUT_DIR, {recursive: true});
36+
37+
// On a 16-core machine, 8 workers were optimal (and slightly faster than 16)
38+
const numWorkers = Math.max(Math.floor(cpus().length / 2), 2);
39+
const workerTasks = new Array(numWorkers).fill(null).map(() => []);
40+
41+
console.log(`🔎 Discovering files to convert...`);
42+
43+
let numFiles = 0;
44+
let workerIdx = 0;
45+
// Need a high buffer size here otherwise Node skips some subdirectories!
46+
// See https://github.com/nodejs/node/issues/48820
47+
const dir = await opendir(INPUT_DIR, {recursive: true, bufferSize: 1024});
48+
for await (const dirent of dir) {
49+
if (dirent.name.endsWith('.html') && dirent.isFile()) {
50+
const sourcePath = path.join(dirent.parentPath || dirent.path, dirent.name);
51+
const targetDir = path.join(
52+
OUTPUT_DIR,
53+
path.relative(INPUT_DIR, dirent.parentPath || dirent.path)
54+
);
55+
await mkdir(targetDir, {recursive: true});
56+
const targetPath = path.join(targetDir, dirent.name.slice(0, -5) + '.md');
57+
workerTasks[workerIdx].push({sourcePath, targetPath});
58+
workerIdx = (workerIdx + 1) % numWorkers;
59+
numFiles++;
60+
}
61+
}
62+
63+
console.log(`📄 Converting ${numFiles} files with ${numWorkers} workers...`);
64+
65+
const selfPath = fileURLToPath(import.meta.url);
66+
const workerPromises = new Array(numWorkers - 1).fill(null).map((_, idx) => {
67+
return new Promise((resolve, reject) => {
68+
const worker = new Worker(selfPath, {workerData: workerTasks[idx]});
69+
let hasErrors = false;
70+
worker.on('message', data => {
71+
if (data.failedTasks.length === 0) {
72+
console.log(`✅ Worker[${idx}]: ${data.success} files successfully.`);
73+
} else {
74+
hasErrors = true;
75+
console.error(`❌ Worker[${idx}]: ${data.failedTasks.length} files failed:`);
76+
console.error(data.failedTasks);
77+
}
78+
});
79+
worker.on('error', reject);
80+
worker.on('exit', code => {
81+
if (code !== 0) {
82+
reject(new Error(`Worker[${idx}] stopped with exit code ${code}`));
83+
} else {
84+
hasErrors ? reject(new Error(`Worker[${idx}] had some errors.`)) : resolve();
85+
}
86+
});
87+
});
88+
});
89+
// The main thread can also process tasks -- That's 65% more bullet per bullet! -Cave Johnson
90+
workerPromises.push(processTaskList(workerTasks[workerTasks.length - 1]));
91+
92+
await Promise.all(workerPromises);
93+
94+
console.log(`📄 Generated ${numFiles} markdown files from HTML.`);
95+
console.log('✅ Markdown export generation complete!');
96+
}
97+
98+
async function genMDFromHTML(source, target) {
99+
const text = await readFile(source, {encoding: 'utf8'});
100+
await writeFile(
101+
target,
102+
String(
103+
await unified()
104+
.use(rehypeParse)
105+
// Need the `main div > hgroup` selector for the headers
106+
.use(() => tree => selectAll('main div > hgroup, div#main', tree))
107+
// If we don't do this wrapping, rehypeRemark just returns an empty string -- yeah WTF?
108+
.use(() => tree => ({
109+
type: 'element',
110+
tagName: 'div',
111+
properties: {},
112+
children: tree,
113+
}))
114+
.use(rehypeRemark, {
115+
document: false,
116+
handlers: {
117+
// Remove buttons as they usually get confusing in markdown, especially since we use them as tab headers
118+
button() {},
119+
},
120+
})
121+
// We end up with empty inline code blocks, probably from some tab logic in the HTML, remove them
122+
.use(() => tree => remove(tree, {type: 'inlineCode', value: ''}))
123+
.use(remarkGfm)
124+
.use(remarkStringify)
125+
.process(text)
126+
)
127+
);
128+
}
129+
130+
async function processTaskList(tasks) {
131+
const failedTasks = [];
132+
for (const {sourcePath, targetPath} of tasks) {
133+
try {
134+
await genMDFromHTML(sourcePath, targetPath);
135+
} catch (error) {
136+
failedTasks.push({sourcePath, targetPath, error});
137+
}
138+
}
139+
return {success: tasks.length - failedTasks.length, failedTasks};
140+
}
141+
142+
async function doWork(tasks) {
143+
parentPort.postMessage(await processTaskList(tasks));
144+
}
145+
146+
if (isMainThread) {
147+
await createWork();
148+
} else {
149+
await doWork(workerData);
150+
}

src/components/apiExamples/apiExamples.tsx

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,11 @@ export function ApiExamples({api}: Props) {
129129
</button>
130130
</div>
131131
<pre className={`${styles['api-block-example']} relative`}>
132-
<div className={codeBlockStyles.copied} style={{opacity: showCopied ? 1 : 0}}>
132+
<div
133+
data-mdast="ignore"
134+
className={codeBlockStyles.copied}
135+
style={{opacity: showCopied ? 1 : 0}}
136+
>
133137
Copied
134138
</div>
135139
{selectedTabView === 0 &&

src/components/breadcrumbs/index.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ export function Breadcrumbs({leafNode}: BreadcrumbsProps) {
2424
}
2525

2626
return (
27-
<ul className="list-none flex p-0 flex-wrap" style={{margin: 0}}>
27+
<ul className="list-none flex p-0 flex-wrap float-left" style={{margin: 0}}>
2828
{breadcrumbs.map(b => {
2929
return (
3030
<li className={styles['breadcrumb-item']} key={b.to}>

src/components/codeBlock/index.tsx

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,11 @@ export function CodeBlock({filename, language, children}: CodeBlockProps) {
5555
</button>
5656
)}
5757
</div>
58-
<div className={styles.copied} style={{opacity: showCopied ? 1 : 0}}>
58+
<div
59+
data-mdast="ignore"
60+
className={styles.copied}
61+
style={{opacity: showCopied ? 1 : 0}}
62+
>
5963
Copied
6064
</div>
6165
<div ref={codeRef}>

src/components/docPage/index.tsx

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import {ReactNode} from 'react';
2+
import Link from 'next/link';
23

34
import {getCurrentGuide, getCurrentPlatform, nodeForPath} from 'sentry-docs/docTree';
5+
import Markdown from 'sentry-docs/icons/Markdown';
46
import {serverContext} from 'sentry-docs/serverContext';
57
import {FrontMatter} from 'sentry-docs/types';
68
import {PaginationNavNode} from 'sentry-docs/types/paginationNavNode';
@@ -81,7 +83,17 @@ export function DocPage({
8183
<div className="mb-4">
8284
<Banner />
8385
</div>
84-
{leafNode && <Breadcrumbs leafNode={leafNode} />}
86+
<div className="overflow-hidden">
87+
{leafNode && <Breadcrumbs leafNode={leafNode} />}{' '}
88+
<Link
89+
rel="nofollow"
90+
className="float-right"
91+
href={`/${pathname}.md`}
92+
title="Markdown version of this page"
93+
>
94+
<Markdown className="flex p-0 flex-wrap" width={24} height={24} />
95+
</Link>
96+
</div>
8597
<div>
8698
<hgroup>
8799
<h1>{frontMatter.title}</h1>

src/icons/Markdown.tsx

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
function Markdown({width = 16, height = 16, ...props}: React.SVGAttributes<SVGElement>) {
2+
return (
3+
<svg
4+
xmlns="http://www.w3.org/2000/svg"
5+
width={width}
6+
height={height}
7+
viewBox="0 0 32 32"
8+
fill="currentColor"
9+
{...props}
10+
>
11+
<path d="M 2.875 6 C 1.320313 6 0 7.253906 0 8.8125 L 0 23.1875 C 0 24.746094 1.320313 26 2.875 26 L 29.125 26 C 30.679688 26 32 24.746094 32 23.1875 L 32 8.8125 C 32 7.253906 30.679688 6 29.125 6 Z M 2.875 8 L 29.125 8 C 29.640625 8 30 8.382813 30 8.8125 L 30 23.1875 C 30 23.617188 29.640625 24 29.125 24 L 2.875 24 C 2.359375 24 2 23.617188 2 23.1875 L 2 8.8125 C 2 8.382813 2.359375 8 2.875 8 Z M 5 11 L 5 21 L 8 21 L 8 14.34375 L 11 18.3125 L 14 14.34375 L 14 21 L 17 21 L 17 11 L 14 11 L 11 15 L 8 11 Z M 22 11 L 22 16 L 19 16 L 23.5 21 L 28 16 L 25 16 L 25 11 Z" />
12+
</svg>
13+
);
14+
}
15+
export default Markdown;

src/mdx.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -429,9 +429,9 @@ export async function getFileBySlug(slug: string) {
429429
],
430430
},
431431
],
432-
[rehypePrismPlus, {ignoreMissing: true}],
432+
[rehypePrismPlus, {ignoreMissing: true}] as any,
433433
rehypeOnboardingLines,
434-
[rehypePrismDiff, {remove: true}],
434+
[rehypePrismDiff, {remove: true}] as any,
435435
rehypePresetMinify,
436436
];
437437
return options;

0 commit comments

Comments
 (0)