@@ -366,6 +366,7 @@ pub struct Builder<'a> {
366
366
strip_comments : bool ,
367
367
id_prefix : Option < & ' a str > ,
368
368
generic_attribute_prefixes : Option < HashSet < & ' a str > > ,
369
+ is_document : bool ,
369
370
}
370
371
371
372
impl < ' a > Default for Builder < ' a > {
@@ -486,6 +487,7 @@ impl<'a> Default for Builder<'a> {
486
487
strip_comments : true ,
487
488
id_prefix : None ,
488
489
generic_attribute_prefixes : None ,
490
+ is_document : false ,
489
491
}
490
492
}
491
493
}
@@ -1705,6 +1707,16 @@ impl<'a> Builder<'a> {
1705
1707
}
1706
1708
}
1707
1709
1710
+ /// Use this to parse a full document instead of a document fragment (like a div)
1711
+ pub fn parse_as_document ( & mut self ) -> & mut Self {
1712
+ self . is_document = true ;
1713
+ // TODO: expand on this
1714
+ self . add_tags ( [ "base" , "body" , "head" , "title" ] )
1715
+ . add_tag_attributes ( "meta" , [ "name" , "content" ] )
1716
+ . add_tag_attributes ( "html" , [ "lang" ] ) ;
1717
+ self
1718
+ }
1719
+
1708
1720
/// Sanitizes an HTML fragment in a string according to the configured options.
1709
1721
///
1710
1722
/// # Examples
@@ -1725,7 +1737,11 @@ impl<'a> Builder<'a> {
1725
1737
/// # }
1726
1738
/// # fn main() { do_main().unwrap() }
1727
1739
pub fn clean ( & self , src : & str ) -> Document {
1728
- let parser = Self :: make_parser ( ) ;
1740
+ let parser = if self . is_document {
1741
+ html:: parse_document ( RcDom :: default ( ) , html:: ParseOpts :: default ( ) )
1742
+ } else {
1743
+ Self :: make_parser ( )
1744
+ } ;
1729
1745
let dom = parser. one ( src) ;
1730
1746
self . clean_dom ( dom)
1731
1747
}
@@ -1788,7 +1804,10 @@ impl<'a> Builder<'a> {
1788
1804
. is_none( ) ) ;
1789
1805
}
1790
1806
for tag_name in & self . clean_content_tags {
1791
- assert ! ( !self . tags. contains( tag_name) , "`{tag_name}` appears in `clean_content_tags` and in `tags` at the same time" ) ;
1807
+ assert ! (
1808
+ !self . tags. contains( tag_name) ,
1809
+ "`{tag_name}` appears in `clean_content_tags` and in `tags` at the same time"
1810
+ ) ;
1792
1811
assert ! ( !self . tag_attributes. contains_key( tag_name) , "`{tag_name}` appears in `clean_content_tags` and in `tag_attributes` at the same time" ) ;
1793
1812
}
1794
1813
let body = {
0 commit comments