|
158 | 158 | <link rel="canonical" href="/web/20131226101010oe_/http://example.com/">
|
159 | 159 |
|
160 | 160 | # rel=canonical: no_rewrite
|
161 |
| ->>> parse('<link rel=canonical href="http://example.com/">', urlrewriter=no_base_canon_rewriter) |
162 |
| -<link rel="canonical" href="http://example.com/"> |
| 161 | +>>> parse('<link rel=canonical href="http://example.com/canon/path">', urlrewriter=no_base_canon_rewriter) |
| 162 | +<link rel="canonical" href="http://example.com/canon/path"> |
| 163 | +
|
| 164 | +# rel=canonical: no_rewrite |
| 165 | +>>> parse('<link rel=canonical href="/relative/path">', urlrewriter=no_base_canon_rewriter) |
| 166 | +<link rel="canonical" href="http://example.com/relative/path"> |
163 | 167 |
|
164 | 168 | # doctype
|
165 | 169 | >>> parse('<!doctype html PUBLIC "public">')
|
|
210 | 214 | import pprint
|
211 | 215 | import urllib
|
212 | 216 |
|
213 |
| -urlrewriter = UrlRewriter('20131226101010/http://example.com/some/path/index.html', |
214 |
| - '/web/', |
215 |
| - rewrite_opts=dict(punycode_links=False)) |
| 217 | +ORIGINAL_URL = 'http://example.com/some/path/index.html' |
| 218 | + |
| 219 | +def new_rewriter(prefix='/web/', rewrite_opts=dict()): |
| 220 | + PROXY_PATH = '20131226101010/{0}'.format(ORIGINAL_URL) |
| 221 | + return UrlRewriter(PROXY_PATH, prefix, rewrite_opts=rewrite_opts) |
216 | 222 |
|
217 |
| -full_path_urlrewriter = UrlRewriter('20131226101010/http://example.com/some/path/index.html', |
218 |
| - 'http://localhost:80/web/', |
219 |
| - rewrite_opts=dict(punycode_links=False)) |
| 223 | +urlrewriter = new_rewriter(rewrite_opts=dict(punycode_links=False)) |
220 | 224 |
|
221 |
| -urlrewriter_pencode = UrlRewriter('20131226101010/http://example.com/some/path/index.html', |
222 |
| - '/web/', |
223 |
| - rewrite_opts=dict(punycode_links=True)) |
| 225 | +full_path_urlrewriter = new_rewriter(prefix='http://localhost:80/web/', |
| 226 | + rewrite_opts=dict(punycode_links=False)) |
224 | 227 |
|
| 228 | +urlrewriter_pencode = new_rewriter(rewrite_opts=dict(punycode_links=True)) |
225 | 229 |
|
226 |
| -no_base_canon_rewriter = UrlRewriter('20131226101010/http://example.com/some/path/index.html', |
227 |
| - '/web/', |
228 |
| - rewrite_opts=dict(rewrite_rel_canon=False, |
229 |
| - rewrite_base=False)) |
| 230 | +no_base_canon_rewriter = new_rewriter(rewrite_opts=dict(rewrite_rel_canon=False, |
| 231 | + rewrite_base=False)) |
230 | 232 |
|
231 | 233 | def parse(data, head_insert=None, urlrewriter=urlrewriter):
|
232 |
| - parser = HTMLRewriter(urlrewriter, head_insert = head_insert) |
| 234 | + parser = HTMLRewriter(urlrewriter, head_insert = head_insert, url = ORIGINAL_URL) |
233 | 235 |
|
234 | 236 | if isinstance(data, unicode):
|
235 | 237 | data = data.encode('utf-8')
|
|
0 commit comments