Skip to content

Commit ea6911a

Browse files
authored
Merge pull request #43 from shaarli/hotfix/sanatizing-improvements
Improve sanitizing regexp
2 parents b9d4141 + 59093c8 commit ea6911a

File tree

3 files changed

+58
-3
lines changed

3 files changed

+58
-3
lines changed

NetscapeBookmarkParser.php

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,7 @@ public static function sanitizeString($bookmarkString)
262262
$sanitized = $bookmarkString;
263263

264264
// trim comments
265-
$sanitized = preg_replace('@<!--.*-->@mis', '', $sanitized);
265+
$sanitized = preg_replace('@<!--.*?-->@mis', '', $sanitized);
266266

267267
// keep one XML element per line to prepare for linear parsing
268268
$sanitized = preg_replace('@>(\s*?)<@mis', ">\n<", $sanitized);
@@ -279,9 +279,9 @@ public static function sanitizeString($bookmarkString)
279279
// convert multiline descriptions to one-line descriptions
280280
// line feeds are converted to <br>
281281
$sanitized = preg_replace_callback(
282-
'@<DD>(.*?)<@mis',
282+
'@<DD>(.*?)(</?(:?DT|DD|DL))@mis',
283283
function($match) {
284-
return '<DD>'.str_replace("\n", '<br>', trim($match[1])).PHP_EOL.'<';
284+
return '<DD>'.str_replace("\n", '<br>', trim($match[1])).PHP_EOL. $match[2];
285285
},
286286
$sanitized
287287
);

tests/ParseDeliciousBookmarksTest.php

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,4 +59,43 @@ public function testParse()
5959
);
6060
$this->assertEquals('1412085559', $bkm[4]['time']);
6161
}
62+
63+
/**
64+
* Make sure that the sanitizing function doesn't strip content
65+
*/
66+
public function testParseStrictSanitizing()
67+
{
68+
$parser = new NetscapeBookmarkParser();
69+
$bkm = $parser->parseFile('tests/input/delicious_sanitize.htm');
70+
$this->assertEquals(2, sizeof($bkm));
71+
72+
$this->assertEquals(
73+
'Text
74+
<li>#CLE ---> #VALEUR</li>
75+
</BOUCLE_exploiter>
76+
</code>',
77+
$bkm[0]['note']
78+
);
79+
$this->assertEquals('1', $bkm[0]['pub']);
80+
$this->assertEquals('1380651656', $bkm[0]['time']);
81+
$this->assertEquals('http://spip.pastebin.fr/28921', $bkm[0]['uri']);
82+
$this->assertEquals(
83+
'spip pastebin - outil de debug collaboratif - Bonjour les écureuils !',
84+
$bkm[0]['title']
85+
);
86+
$this->assertEquals('spip3 astuces ', $bkm[0]['tags']);
87+
88+
$this->assertEquals('1', $bkm[1]['pub']);
89+
$this->assertEquals('1380651611', $bkm[1]['time']);
90+
$this->assertEquals(
91+
'http://www.la-grange.net/2013/09/07/changement',
92+
$bkm[1]['uri']
93+
);
94+
$this->assertEquals('Changer le monde - Carnets Web de La Grange', $bkm[1]['title']);
95+
$this->assertEquals(
96+
'La juxtaposition des mots propriétés et intellectuel (du monde des idées) '
97+
.'est une aberration dans un contexte de l\'échange et de la culture.',
98+
$bkm[1]['note']
99+
);
100+
}
62101
}

tests/input/delicious_sanitize.htm

Lines changed: 16 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)