Skip to content

Commit a6ddc86

Browse files
committed
feat(valid-document): add options that result in valid documents
- Add option to remove invalid white space text elements - Add option to handle invalid inline nodes at top level - Add option to handle invalid text nodes at top level
1 parent e68afaf commit a6ddc86

File tree

7 files changed

+289
-164
lines changed

7 files changed

+289
-164
lines changed

src/htmlStringToDocument.ts

Lines changed: 17 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -5,28 +5,24 @@ import {
55
convertTextNodeToText,
66
convertTagToChildren,
77
} from "./converters";
8-
import { parseHtml } from "./parseHtml";
8+
import { parseHtml, ParserOptions } from "./parseHtml";
99
import {
10-
TopLevelBlock,
1110
Document,
1211
Mark,
1312
Text,
1413
Inline,
1514
Block,
16-
BLOCKS,
1715
} from "@contentful/rich-text-types";
1816
import type {
19-
HTMLElementNode,
2017
HTMLNode,
2118
HTMLTagName,
22-
HTMLTextNode,
2319
Next,
2420
Options,
2521
OptionsWithDefaults,
2622
TagConverter,
27-
TextConverter,
2823
} from "./types";
29-
import { createDocumentNode, getAsList, isWhiteSpace } from "./utils";
24+
import { createDocumentNode, getAsList, isNotNull } from "./utils";
25+
import { processConvertedNodesFromTopLevel } from "./processConvertedNodesFromTopLevel";
3026

3127
const DEFAULT_TAG_CONVERTERS: Partial<
3228
Record<HTMLTagName, TagConverter<Block | Inline | Text>>
@@ -61,7 +57,6 @@ const mapHtmlNodeToRichTextNode = (
6157
node: HTMLNode,
6258
marks: Mark[],
6359
options: OptionsWithDefaults,
64-
isTopLevel = false,
6560
) => {
6661
const { convertText, convertTag } = options;
6762

@@ -96,34 +91,22 @@ export const htmlStringToDocument = (
9691
...options.convertTag,
9792
},
9893
convertText: options.convertText ?? convertTextNodeToText,
99-
wrapTopLevelTextNodesInParagraphs: false,
100-
ignoreWhiteSpace: false,
101-
isWhiteSpace: options.isWhiteSpace ?? isWhiteSpace,
94+
handleTopLevelInlines: options.handleTopLevelInlines ?? "preserve",
95+
handleTopLevelText: options.handleTopLevelText ?? "preserve",
96+
ignoreWhiteSpace: options.ignoreWhiteSpace ?? false,
10297
};
103-
const parsedHtml = parseHtml(htmlString);
104-
const richTextNodes = parsedHtml.flatMap((node) =>
105-
mapHtmlNodeToRichTextNode(node, [], optionsWithDefaults, true),
106-
);
10798

108-
const richTextNodesWithTopLevelTextNodesConverted: TopLevelBlock[] =
109-
richTextNodes.map((node) => {
110-
if (node.nodeType === "text") {
111-
return {
112-
data: {},
113-
nodeType: BLOCKS.PARAGRAPH,
114-
content: [
115-
{
116-
...node,
117-
nodeType: "text",
118-
},
119-
],
120-
};
121-
}
99+
const parserOptions: ParserOptions = {
100+
ignoreWhiteSpace: optionsWithDefaults.ignoreWhiteSpace,
101+
};
122102

123-
//TODO: Remove this type assertion.
124-
// Other possible non-top level blocks are: LIST_ITEM, TABLE_ROW, TABLE_CELL and TABLE_HEADER_CELL
125-
return node as TopLevelBlock;
126-
});
103+
const parsedHtml = parseHtml(htmlString, parserOptions);
104+
const richTextNodes = parsedHtml.flatMap((node) =>
105+
mapHtmlNodeToRichTextNode(node, [], optionsWithDefaults),
106+
);
107+
const processedRichTextNodes = richTextNodes
108+
.map((node) => processConvertedNodesFromTopLevel(node, optionsWithDefaults))
109+
.filter(isNotNull);
127110

128-
return createDocumentNode(richTextNodesWithTopLevelTextNodesConverted);
111+
return createDocumentNode(processedRichTextNodes);
129112
};

src/parseHtml.ts

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@ import { isNotNull, isWhiteSpace } from "./utils";
1010

1111
import type { HTMLNode, HTMLTagName } from "./types";
1212

13+
export interface ParserOptions {
14+
ignoreWhiteSpace: boolean;
15+
}
16+
1317
const isChildNodeComment = (childNode: ChildNode): childNode is CommentNode => {
1418
return childNode.nodeName === "#comment";
1519
};
@@ -30,9 +34,12 @@ const isChildNodeDocumentType = (
3034

3135
const isTextNodePureWhiteSpace = (textNode: TextNode): boolean => {
3236
return isWhiteSpace(textNode.value);
33-
}
37+
};
3438

35-
const mapChildNodeToHtmlNode = (childNode: ChildNode): HTMLNode | null => {
39+
const mapChildNodeToHtmlNode = (
40+
childNode: ChildNode,
41+
options: ParserOptions,
42+
): HTMLNode | null => {
3643
if (
3744
isChildNodeComment(childNode) ||
3845
isChildNodeDocumentType(childNode) ||
@@ -41,7 +48,7 @@ const mapChildNodeToHtmlNode = (childNode: ChildNode): HTMLNode | null => {
4148
return null;
4249
}
4350
if (isChildNodeTextNode(childNode)) {
44-
if (isTextNodePureWhiteSpace(childNode)) {
51+
if (options.ignoreWhiteSpace && isTextNodePureWhiteSpace(childNode)) {
4552
return null;
4653
}
4754
return {
@@ -54,17 +61,20 @@ const mapChildNodeToHtmlNode = (childNode: ChildNode): HTMLNode | null => {
5461
type: "element",
5562
tagName: childNode.tagName as HTMLTagName,
5663
children: childNode.childNodes
57-
.map((c) => mapChildNodeToHtmlNode(c))
64+
.map((c) => mapChildNodeToHtmlNode(c, options))
5865
.filter(isNotNull),
5966
attrs: Object.fromEntries(
6067
childNode.attrs.map((attr) => [attr.name, attr.value]),
6168
),
6269
};
6370
};
6471

65-
export const parseHtml = (htmlString: string): HTMLNode[] => {
72+
export const parseHtml = (
73+
htmlString: string,
74+
options: ParserOptions,
75+
): HTMLNode[] => {
6676
const parsedHtml = parseFragment(htmlString);
6777
return parsedHtml.childNodes
68-
.map((node) => mapChildNodeToHtmlNode(node))
78+
.map((node) => mapChildNodeToHtmlNode(node, options))
6979
.filter(isNotNull);
7080
};
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
import {
2+
Block,
3+
BLOCKS,
4+
Inline,
5+
Text,
6+
TopLevelBlock,
7+
} from "@contentful/rich-text-types";
8+
import type { OptionsWithDefaults } from "./types";
9+
import {
10+
isNodeTypeBlock,
11+
isNodeTypeInline,
12+
isNodeTypeText,
13+
isNodeTypeTopLevelBlock,
14+
} from "./utils";
15+
16+
export const processConvertedNodesFromTopLevel = (
17+
node: Block | Inline | Text,
18+
options: OptionsWithDefaults,
19+
): TopLevelBlock | null => {
20+
if (isNodeTypeBlock(node)) {
21+
if (isNodeTypeTopLevelBlock(node)) {
22+
return node;
23+
}
24+
// Block types that can not be at the top level are: BLOCKS.DOCUMENT | BLOCKS.LIST_ITEM | BLOCKS.TABLE_ROW | BLOCKS.TABLE_CELL | BLOCKS.TABLE_HEADER_CELL
25+
if (node.nodeType === BLOCKS.DOCUMENT) {
26+
return null;
27+
}
28+
// TODO: Handle top level list items and table elements
29+
return node as unknown as TopLevelBlock;
30+
}
31+
if (isNodeTypeInline(node)) {
32+
if (options.handleTopLevelInlines === "remove") {
33+
return null;
34+
}
35+
if (options.handleTopLevelInlines === "wrap-paragraph") {
36+
return {
37+
nodeType: BLOCKS.PARAGRAPH,
38+
data: {},
39+
content: [node],
40+
};
41+
}
42+
return node as unknown as TopLevelBlock;
43+
}
44+
if (isNodeTypeText(node)) {
45+
if (options.handleTopLevelText === "remove") {
46+
return null;
47+
}
48+
if (options.handleTopLevelText === "wrap-paragraph") {
49+
return {
50+
nodeType: BLOCKS.PARAGRAPH,
51+
data: {},
52+
content: [node],
53+
};
54+
}
55+
return node as unknown as TopLevelBlock;
56+
}
57+
return null;
58+
};

src/test/helpers.ts

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,9 @@ import {
22
Block,
33
BLOCKS,
44
Inline,
5+
INLINES,
56
Mark,
67
Text,
7-
TopLevelBlock,
8-
TopLevelBlockEnum,
98
} from "@contentful/rich-text-types";
109
import { getAsList } from "../utils";
1110

@@ -28,3 +27,15 @@ export const createBlock = (
2827
data: {},
2928
};
3029
};
30+
31+
export const createInline = (
32+
nodeType: INLINES,
33+
content: Text | Inline | Array<Text | Inline>,
34+
data: { [key: string]: string } = {},
35+
): Inline => {
36+
return {
37+
nodeType,
38+
content: getAsList(content),
39+
data,
40+
};
41+
};

0 commit comments

Comments
 (0)