Skip to content

Commit beb34a2

Browse files
author
Orion Gonzalez
committed
feat: support parsing documents
1 parent 25017a4 commit beb34a2

File tree

1 file changed

+21
-2
lines changed

1 file changed

+21
-2
lines changed

src/lib.rs

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -366,6 +366,7 @@ pub struct Builder<'a> {
366366
strip_comments: bool,
367367
id_prefix: Option<&'a str>,
368368
generic_attribute_prefixes: Option<HashSet<&'a str>>,
369+
is_document: bool,
369370
}
370371

371372
impl<'a> Default for Builder<'a> {
@@ -486,6 +487,7 @@ impl<'a> Default for Builder<'a> {
486487
strip_comments: true,
487488
id_prefix: None,
488489
generic_attribute_prefixes: None,
490+
is_document: false,
489491
}
490492
}
491493
}
@@ -1705,6 +1707,16 @@ impl<'a> Builder<'a> {
17051707
}
17061708
}
17071709

1710+
/// Use this to parse a full document instead of a document fragment (like a div)
1711+
pub fn parse_as_document(&mut self) -> &mut Self {
1712+
self.is_document = true;
1713+
// TODO: expand on this
1714+
self.add_tags(["base", "body", "head", "title"])
1715+
.add_tag_attributes("meta", ["name", "content"])
1716+
.add_tag_attributes("html", ["lang"]);
1717+
self
1718+
}
1719+
17081720
/// Sanitizes an HTML fragment in a string according to the configured options.
17091721
///
17101722
/// # Examples
@@ -1725,7 +1737,11 @@ impl<'a> Builder<'a> {
17251737
/// # }
17261738
/// # fn main() { do_main().unwrap() }
17271739
pub fn clean(&self, src: &str) -> Document {
1728-
let parser = Self::make_parser();
1740+
let parser = if self.is_document {
1741+
html::parse_document(RcDom::default(), html::ParseOpts::default())
1742+
} else {
1743+
Self::make_parser()
1744+
};
17291745
let dom = parser.one(src);
17301746
self.clean_dom(dom)
17311747
}
@@ -1788,7 +1804,10 @@ impl<'a> Builder<'a> {
17881804
.is_none());
17891805
}
17901806
for tag_name in &self.clean_content_tags {
1791-
assert!(!self.tags.contains(tag_name), "`{tag_name}` appears in `clean_content_tags` and in `tags` at the same time");
1807+
assert!(
1808+
!self.tags.contains(tag_name),
1809+
"`{tag_name}` appears in `clean_content_tags` and in `tags` at the same time"
1810+
);
17921811
assert!(!self.tag_attributes.contains_key(tag_name), "`{tag_name}` appears in `clean_content_tags` and in `tag_attributes` at the same time");
17931812
}
17941813
let body = {

0 commit comments

Comments
 (0)