Skip to content

Commit 13d8d97

Browse files
committed
fix(docs): add documentation for parserOptions and postProcessing
1 parent a6ddc86 commit 13d8d97

File tree

5 files changed

+119
-21
lines changed

5 files changed

+119
-21
lines changed

README.md

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,3 +270,67 @@ htmlStringToDocument(htmlString, options);
270270
// ],
271271
// };
272272
```
273+
274+
## invalid Rich Text Documents
275+
276+
The Contentful Rich Text format requires the `Document` adhere to a specific format.
277+
The full ruleset can be found in the [Contentful Documentation](https://www.contentful.com/developers/docs/concepts/rich-text/#rules-of-rich-text).
278+
279+
By default this library will convert any HTML node by node to create a rich text document. This means that the result can be an invalid document.
280+
281+
Uploading an invalid document to Contentful will result in an error. The `@contentful/rich-text-types` package from Contentful includes a `validateRichTextDocument` as of version `17.0.0`.
282+
283+
**To mitigate invalid documents you have a few options:**
284+
285+
- Use the built in `parserOptions` and/or `postProcessing` options. (Currently useful for removing whitespace, and fixing top level nodes).
286+
- Add a custom `TagConverter` og `TextConverter` that handles your case. (To handle cases like wrong child elements of `Inline` nodes, list elements, or tables).
287+
- Change your HTML to a valid format before converting it.
288+
289+
### Handling invalid top level nodes
290+
291+
Some elements can not be at the top level of a `Document`. This includes `Text`-nodes, `Inline`-nodes, `li`-elements, and any child element of `table` (like a `tr` or `td`).
292+
293+
To handle cases where this appears this library includes a few utilities that process document after it has been created.
294+
295+
These options are:
296+
297+
- `options.postProcessing.handleTopLevelText: "preserve" | "remove" | "wrap-paragraph"`. Default: `"preserve"`.
298+
- `options.postProcessing.handleTopLevelInlines: "preserve" | "remove" | "wrap-paragraph"`. Default: `"preserve"`.
299+
300+
Examples of usage:
301+
302+
```typescript
303+
const htmlNodes = htmlStringToDocument(html, {
304+
postProcessing: {
305+
handleTopLevelText: "wrap-paragraph",
306+
handleTopLevelInlines: "remove",
307+
},
308+
});
309+
```
310+
311+
How it works:
312+
313+
- `"preserve"`: Keep top level nodes as they are, even if it results in an invalid `Document`.
314+
- `"remove"`: Remove the node with all its child nodes from the document.
315+
- `"wrap-paragraph"`: Wrap the node in a simple `paragraph`-node to make it valid.
316+
317+
### Handling extra whitespace nodes
318+
319+
A formatted HTML string might include whitespace that will be parsed and added to the document output. This can result in unwanted text nodes or an invalid document.
320+
321+
Whitespace can be removed by using the `handleWhitespaceNodes` option.
322+
323+
- `optons.parserOptions.handleWhitespaceNodes: "preserve" | "remove"`. Default: `"preserve"`.
324+
325+
```typescript
326+
const htmlNodes = htmlStringToDocument(html, {
327+
parserOptions: {
328+
handleWhitespaceNodes: "preserve",
329+
},
330+
});
331+
```
332+
333+
How it works:
334+
335+
- `"preserve"`: Keep all whitespace text nodes as they are in the original html string.
336+
- `"remove"`: Remove any text node that consist purely of whitespace from the HTML node tree. Uses the following Regex `/^\s*$/`.

src/htmlStringToDocument.ts

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -91,13 +91,21 @@ export const htmlStringToDocument = (
9191
...options.convertTag,
9292
},
9393
convertText: options.convertText ?? convertTextNodeToText,
94-
handleTopLevelInlines: options.handleTopLevelInlines ?? "preserve",
95-
handleTopLevelText: options.handleTopLevelText ?? "preserve",
96-
ignoreWhiteSpace: options.ignoreWhiteSpace ?? false,
94+
parserOptions: {
95+
handleWhitespaceNodes:
96+
options?.parserOptions?.handleWhitespaceNodes ?? "preserve",
97+
},
98+
postProcessing: {
99+
handleTopLevelInlines:
100+
options?.postProcessing?.handleTopLevelInlines ?? "preserve",
101+
handleTopLevelText:
102+
options?.postProcessing?.handleTopLevelText ?? "preserve",
103+
},
97104
};
98105

99106
const parserOptions: ParserOptions = {
100-
ignoreWhiteSpace: optionsWithDefaults.ignoreWhiteSpace,
107+
ignoreWhiteSpace:
108+
optionsWithDefaults.parserOptions.handleWhitespaceNodes == "remove",
101109
};
102110

103111
const parsedHtml = parseHtml(htmlString, parserOptions);

src/processConvertedNodesFromTopLevel.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,10 @@ export const processConvertedNodesFromTopLevel = (
2929
return node as unknown as TopLevelBlock;
3030
}
3131
if (isNodeTypeInline(node)) {
32-
if (options.handleTopLevelInlines === "remove") {
32+
if (options.postProcessing.handleTopLevelInlines === "remove") {
3333
return null;
3434
}
35-
if (options.handleTopLevelInlines === "wrap-paragraph") {
35+
if (options.postProcessing.handleTopLevelInlines === "wrap-paragraph") {
3636
return {
3737
nodeType: BLOCKS.PARAGRAPH,
3838
data: {},
@@ -42,10 +42,10 @@ export const processConvertedNodesFromTopLevel = (
4242
return node as unknown as TopLevelBlock;
4343
}
4444
if (isNodeTypeText(node)) {
45-
if (options.handleTopLevelText === "remove") {
45+
if (options.postProcessing.handleTopLevelText === "remove") {
4646
return null;
4747
}
48-
if (options.handleTopLevelText === "wrap-paragraph") {
48+
if (options.postProcessing.handleTopLevelText === "wrap-paragraph") {
4949
return {
5050
nodeType: BLOCKS.PARAGRAPH,
5151
data: {},

src/test/index.test.ts

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,9 @@ describe("Processing top level inline nodes to valid formats", () => {
108108
it("Removes an invalid top level inline node", () => {
109109
const htmlNodes = htmlStringToDocument(
110110
`<a href="http://example.com">Top level hyperlink</a>`,
111-
{ handleTopLevelInlines: "remove" },
111+
{
112+
postProcessing: { handleTopLevelInlines: "remove" },
113+
},
112114
);
113115
const matchNode = createDocumentNode([] as TopLevelBlock[]);
114116

@@ -119,7 +121,9 @@ describe("Processing top level inline nodes to valid formats", () => {
119121
it("Wraps an invalid top level inline node in a paragraph", () => {
120122
const htmlNodes = htmlStringToDocument(
121123
`<a href="http://example.com">Top level hyperlink</a>`,
122-
{ handleTopLevelInlines: "wrap-paragraph" },
124+
{
125+
postProcessing: { handleTopLevelInlines: "wrap-paragraph" },
126+
},
123127
);
124128
const matchNode = createDocumentNode([
125129
helpers.createBlock(
@@ -156,7 +160,7 @@ describe("Processing top level text nodes to valid formats", () => {
156160
const htmlNodes = htmlStringToDocument(
157161
"<div>Text under top level div</div>",
158162
{
159-
handleTopLevelText: "wrap-paragraph",
163+
postProcessing: { handleTopLevelText: "wrap-paragraph" },
160164
},
161165
);
162166
const matchNode = createDocumentNode([
@@ -173,7 +177,9 @@ describe("Processing top level text nodes to valid formats", () => {
173177
it("Removes an invalid top level text node", () => {
174178
const htmlNodes = htmlStringToDocument(
175179
"<div>Text under top level div</div>",
176-
{ handleTopLevelText: "remove" },
180+
{
181+
postProcessing: { handleTopLevelText: "remove" },
182+
},
177183
);
178184
const matchNode = createDocumentNode([] as TopLevelBlock[]);
179185

@@ -186,7 +192,9 @@ describe("Processing top level text nodes to valid formats", () => {
186192
"Some unwrapped text prefixing a p tag." +
187193
"<p>Paragraph content <span>I am a text node</span></p>" +
188194
"Some unwrapped text suffixing a p tag",
189-
{ handleTopLevelText: "wrap-paragraph" },
195+
{
196+
postProcessing: { handleTopLevelText: "wrap-paragraph" },
197+
},
190198
);
191199

192200
const matchNode = createDocumentNode([
@@ -210,11 +218,13 @@ describe("Processing top level text nodes to valid formats", () => {
210218
});
211219

212220
describe("Parsing options for whitespace", () => {
213-
it("Handles text nodes with only whitespace by ignoring them", () => {
221+
it("Handles text nodes with only whitespace by removing them", () => {
214222
const html = `<h2>Heading on the first line</h2>\n\n<p>Text on the third line.</p>`;
215223

216224
const htmlNodes = htmlStringToDocument(html, {
217-
ignoreWhiteSpace: true,
225+
parserOptions: {
226+
handleWhitespaceNodes: "remove",
227+
},
218228
});
219229

220230
const matchNode = createDocumentNode([
@@ -232,11 +242,13 @@ describe("Parsing options for whitespace", () => {
232242
expect(validateRichTextDocument(htmlNodes).length).toEqual(0);
233243
});
234244

235-
it("Handles text nodes with only whitespace by including them", () => {
245+
it("Handles text nodes with only whitespace by preserving them", () => {
236246
const html = `<h2>Heading on the first line</h2>\n\n<p>Text on the third line.</p>`;
237247

238248
const htmlNodes = htmlStringToDocument(html, {
239-
ignoreWhiteSpace: false,
249+
parserOptions: {
250+
handleWhitespaceNodes: "preserve",
251+
},
240252
});
241253

242254
const matchNode = createDocumentNode([

src/types.ts

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,15 +55,29 @@ export type TagConverter<
5555

5656
export type ConvertTagOptions = Record<HTMLTagName | string, TagConverter>;
5757

58+
export type HandleWhitespaceNodes = "preserve" | "remove";
5859
export type HandleTopLevelText = "preserve" | "remove" | "wrap-paragraph";
5960
export type HandleTopLevelInlines = "preserve" | "remove" | "wrap-paragraph";
6061

62+
export interface ParserOptions {
63+
handleWhitespaceNodes: HandleWhitespaceNodes;
64+
}
65+
66+
export interface PostProcessingOptions {
67+
handleTopLevelInlines: HandleTopLevelInlines;
68+
handleTopLevelText: HandleTopLevelText;
69+
}
70+
6171
export interface OptionsWithDefaults {
6272
convertTag: ConvertTagOptions;
6373
convertText: TextConverter;
64-
handleTopLevelInlines: HandleTopLevelInlines;
65-
handleTopLevelText: HandleTopLevelText;
66-
ignoreWhiteSpace: boolean;
74+
parserOptions: ParserOptions;
75+
postProcessing: PostProcessingOptions;
6776
}
6877

69-
export type Options = Partial<OptionsWithDefaults>;
78+
export type Options = Partial<
79+
Omit<OptionsWithDefaults, "parserOptions" | "postProcessing"> & {
80+
parserOptions: Partial<ParserOptions>;
81+
postProcessing: Partial<PostProcessingOptions>;
82+
}
83+
>;

0 commit comments

Comments
 (0)