|
37 | 37 | from sphinxcontrib.confluencebuilder.storage.translator import ConfluenceStorageFormatTranslator
|
38 | 38 | from sphinxcontrib.confluencebuilder.transmute import doctree_transmute
|
39 | 39 | from sphinxcontrib.confluencebuilder.util import ConfluenceUtil
|
| 40 | +from sphinxcontrib.confluencebuilder.util import ascii_quote |
40 | 41 | from sphinxcontrib.confluencebuilder.util import detect_cloud
|
41 | 42 | from sphinxcontrib.confluencebuilder.util import extract_strings_from_file
|
42 | 43 | from sphinxcontrib.confluencebuilder.util import first
|
43 | 44 | from sphinxcontrib.confluencebuilder.util import handle_cli_file_subset
|
44 | 45 | from sphinxcontrib.confluencebuilder.writer import ConfluenceWriter
|
45 |
| -from urllib.parse import quote |
46 | 46 | import os
|
47 | 47 | import tempfile
|
48 | 48 | import time
|
@@ -1318,6 +1318,30 @@ def _register_doctree_targets(self, docname, doctree, title_track=None):
|
1318 | 1318 | if last_title_postfix > 0:
|
1319 | 1319 | title_target = f'{title_target}.{last_title_postfix}'
|
1320 | 1320 |
|
| 1321 | + # The value of 'title_target' should now be what the name of the |
| 1322 | + # Confluence-header should be. However, there are a couple of cases |
| 1323 | + # where this (anchor) target cannot be used. In Confluence's v2 |
| 1324 | + # editor, if a title includes non-basic characters, anchor links |
| 1325 | + # to the title can require encoding. Otherwise, Confluence will |
| 1326 | + # ignore/drop the provided anchor pages in the page. On top of |
| 1327 | + # this, if a section has some non-basic characters, Confluence (on |
| 1328 | + # v2) may also inject one or more `[inlineExtension]` string |
| 1329 | + # prefixes into a heading's identifier. |
| 1330 | + # |
| 1331 | + # Instead of dealing with any of this, if we detect a section is |
| 1332 | + # using any characters which may cause issues in building anchors |
| 1333 | + # or causing issues building links to these sections, we will not |
| 1334 | + # use the Confluence-generated identifier. Instead, we will pull |
| 1335 | + # the first section identifier value and use that as a target |
| 1336 | + # instead (as we now inject detected ids anchor values in headers). |
| 1337 | + # |
| 1338 | + # See: https://jira.atlassian.com/browse/CONFCLOUD-74698 |
| 1339 | + if title_target != ascii_quote(title_target): |
| 1340 | + if 'ids' in section_node: |
| 1341 | + old_target = title_target |
| 1342 | + title_target = first(node.parent['ids']) |
| 1343 | + self.verbose(f'target replace {title_target}: {old_target}') |
| 1344 | + |
1321 | 1345 | # If this section is the (first) root section, register a target
|
1322 | 1346 | # for a "root" anchor point. This is important for references that
|
1323 | 1347 | # link to documents (e.g. `:doc:<>`). For example, if "page-a"
|
@@ -1359,43 +1383,6 @@ def _register_doctree_targets(self, docname, doctree, title_track=None):
|
1359 | 1383 | self._register_target(editor, full_id, node_refid)
|
1360 | 1384 |
|
1361 | 1385 | def _register_target(self, editor, refid, target):
|
1362 |
| - # v2 editor does not link anchors with select characters; |
1363 |
| - # provide a workaround that url encodes targets |
1364 |
| - # |
1365 |
| - # See: https://jira.atlassian.com/browse/CONFCLOUD-74698 |
1366 |
| - if not self.config.confluence_adv_disable_confcloud_74698: |
1367 |
| - if editor == 'v2': |
1368 |
| - # We originally encoded specific characters to prevent |
1369 |
| - # Confluence from suppressing anchors for select characters, |
1370 |
| - # but it is unknown the extensive list of characters Confluence |
1371 |
| - # was not happy with. We then switch to `quote` which worked |
1372 |
| - # for the most part, but when users used Emoji's, these |
1373 |
| - # characters would become encoded and generate anchor targets |
1374 |
| - # with incorrect values. Now, we do a partial quote in an |
1375 |
| - # attempt to be flexible -- we quote the standard ASCII range |
1376 |
| - # using Python default safe sets and anything beyond it, we |
1377 |
| - # will just leave as is. |
1378 |
| - def partial_quote(s): |
1379 |
| - chars = [quote(x) if ord(x) < 128 else x for x in s] |
1380 |
| - return ''.join(chars) |
1381 |
| - |
1382 |
| - new_target = partial_quote(target) |
1383 |
| - |
1384 |
| - # So... related to CONFCLOUD-74698, something about anchors |
1385 |
| - # with special characters will cause some pain for links. |
1386 |
| - # This has been observed in the past, was removed after |
1387 |
| - # thinking it was not an issue but is now being added again. |
1388 |
| - # It appears that when a header is generated an identifier in |
1389 |
| - # Confluence Cloud that has special characters, we can observe |
1390 |
| - # Confluence prefixing these identifiers with two copies of |
1391 |
| - # `[inlineExtension]`. Cannot explain why, so if this situation |
1392 |
| - # occurs, just add the prefix data to help ensure links work. |
1393 |
| - if not self.config.confluence_adv_disable_confcloud_ieaj: |
1394 |
| - if new_target != target: |
1395 |
| - new_target = 2 * '[inlineExtension]' + new_target |
1396 |
| - |
1397 |
| - target = new_target |
1398 |
| - |
1399 | 1386 | self.state.register_target(refid, target)
|
1400 | 1387 |
|
1401 | 1388 | # For singleconfluence, register global fallbacks for targets
|
|
0 commit comments