Skip to content

Commit c33c514

Browse files
authored
Merge pull request #1129 from sphinx-contrib/replace-special-v2-anchor-building
Replace special anchor handling
2 parents eb44821 + d8c6605 commit c33c514

File tree

7 files changed

+57
-49
lines changed

7 files changed

+57
-49
lines changed

sphinxcontrib/confluencebuilder/__init__.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -295,10 +295,6 @@ def setup(app):
295295
cm.add_conf_bool('confluence_adv_cloud')
296296
# Disable any delays when publishing property updates on Cloud
297297
cm.add_conf_bool('confluence_adv_disable_cloud_prop_delay')
298-
# Disable workaround for: https://jira.atlassian.com/browse/CONFCLOUD-74698
299-
cm.add_conf_bool('confluence_adv_disable_confcloud_74698')
300-
# Disable workaround for inline-extension anchor injection
301-
cm.add_conf_bool('confluence_adv_disable_confcloud_ieaj')
302298
# Disable any attempts to initialize this extension's custom entities.
303299
cm.add_conf_bool('confluence_adv_disable_init')
304300
# Flag to permit the use of embedded certificates from requests.
@@ -342,6 +338,8 @@ def setup(app):
342338
# replaced by confluence_space_key
343339
cm.add_conf('confluence_space_name')
344340
# dropped
341+
cm.add_conf_bool('confluence_adv_disable_confcloud_74698')
342+
cm.add_conf_bool('confluence_adv_disable_confcloud_ieaj')
345343
cm.add_conf_int('confluence_max_doc_depth')
346344

347345
# ##########################################################################

sphinxcontrib/confluencebuilder/builder.py

Lines changed: 25 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,12 @@
3737
from sphinxcontrib.confluencebuilder.storage.translator import ConfluenceStorageFormatTranslator
3838
from sphinxcontrib.confluencebuilder.transmute import doctree_transmute
3939
from sphinxcontrib.confluencebuilder.util import ConfluenceUtil
40+
from sphinxcontrib.confluencebuilder.util import ascii_quote
4041
from sphinxcontrib.confluencebuilder.util import detect_cloud
4142
from sphinxcontrib.confluencebuilder.util import extract_strings_from_file
4243
from sphinxcontrib.confluencebuilder.util import first
4344
from sphinxcontrib.confluencebuilder.util import handle_cli_file_subset
4445
from sphinxcontrib.confluencebuilder.writer import ConfluenceWriter
45-
from urllib.parse import quote
4646
import os
4747
import tempfile
4848
import time
@@ -1318,6 +1318,30 @@ def _register_doctree_targets(self, docname, doctree, title_track=None):
13181318
if last_title_postfix > 0:
13191319
title_target = f'{title_target}.{last_title_postfix}'
13201320

1321+
# The value of 'title_target' should now be what the name of the
1322+
# Confluence-header should be. However, there are a couple of cases
1323+
# where this (anchor) target cannot be used. In Confluence's v2
1324+
# editor, if a title includes non-basic characters, anchor links
1325+
# to the title can require encoding. Otherwise, Confluence will
1326+
# ignore/drop the provided anchor pages in the page. On top of
1327+
# this, if a section has some non-basic characters, Confluence (on
1328+
# v2) may also inject one or more `[inlineExtension]` string
1329+
# prefixes into a heading's identifier.
1330+
#
1331+
# Instead of dealing with any of this, if we detect a section is
1332+
# using any characters which may cause issues in building anchors
1333+
# or causing issues building links to these sections, we will not
1334+
# use the Confluence-generated identifier. Instead, we will pull
1335+
# the first section identifier value and use that as a target
1336+
# instead (as we now inject detected ids anchor values in headers).
1337+
#
1338+
# See: https://jira.atlassian.com/browse/CONFCLOUD-74698
1339+
if title_target != ascii_quote(title_target):
1340+
if 'ids' in section_node:
1341+
old_target = title_target
1342+
title_target = first(node.parent['ids'])
1343+
self.verbose(f'target replace {title_target}: {old_target}')
1344+
13211345
# If this section is the (first) root section, register a target
13221346
# for a "root" anchor point. This is important for references that
13231347
# link to documents (e.g. `:doc:<>`). For example, if "page-a"
@@ -1359,43 +1383,6 @@ def _register_doctree_targets(self, docname, doctree, title_track=None):
13591383
self._register_target(editor, full_id, node_refid)
13601384

13611385
def _register_target(self, editor, refid, target):
1362-
# v2 editor does not link anchors with select characters;
1363-
# provide a workaround that url encodes targets
1364-
#
1365-
# See: https://jira.atlassian.com/browse/CONFCLOUD-74698
1366-
if not self.config.confluence_adv_disable_confcloud_74698:
1367-
if editor == 'v2':
1368-
# We originally encoded specific characters to prevent
1369-
# Confluence from suppressing anchors for select characters,
1370-
# but it is unknown the extensive list of characters Confluence
1371-
# was not happy with. We then switch to `quote` which worked
1372-
# for the most part, but when users used Emoji's, these
1373-
# characters would become encoded and generate anchor targets
1374-
# with incorrect values. Now, we do a partial quote in an
1375-
# attempt to be flexible -- we quote the standard ASCII range
1376-
# using Python default safe sets and anything beyond it, we
1377-
# will just leave as is.
1378-
def partial_quote(s):
1379-
chars = [quote(x) if ord(x) < 128 else x for x in s]
1380-
return ''.join(chars)
1381-
1382-
new_target = partial_quote(target)
1383-
1384-
# So... related to CONFCLOUD-74698, something about anchors
1385-
# with special characters will cause some pain for links.
1386-
# This has been observed in the past, was removed after
1387-
# thinking it was not an issue but is now being added again.
1388-
# It appears that when a header is generated an identifier in
1389-
# Confluence Cloud that has special characters, we can observe
1390-
# Confluence prefixing these identifiers with two copies of
1391-
# `[inlineExtension]`. Cannot explain why, so if this situation
1392-
# occurs, just add the prefix data to help ensure links work.
1393-
if not self.config.confluence_adv_disable_confcloud_ieaj:
1394-
if new_target != target:
1395-
new_target = 2 * '[inlineExtension]' + new_target
1396-
1397-
target = new_target
1398-
13991386
self.state.register_target(refid, target)
14001387

14011388
# For singleconfluence, register global fallbacks for targets

sphinxcontrib/confluencebuilder/transform.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,13 +39,13 @@ def is_confluence_href_node(node):
3939
))
4040

4141
for refnode in self.document.findall(is_confluence_href_node):
42-
uri = refnode['confluence-params']['href']
42+
uri = refnode['confluence-params']['href'] # type: ignore[index]
4343

4444
if newuri := app.emit_firstresult('linkcheck-process-uri', uri):
4545
uri = newuri
4646

4747
try:
48-
lineno = get_node_line(refnode)
48+
lineno = get_node_line(refnode) # type: ignore[arg-type]
4949
except ValueError:
5050
lineno = -1
5151

sphinxcontrib/confluencebuilder/util.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from sphinxcontrib.confluencebuilder.std.confluence import FONT_X_HEIGHT
1212
from subprocess import check_call
1313
from hashlib import sha256
14+
from urllib.parse import quote
1415
from urllib.parse import urlparse
1516
import getpass
1617
import os
@@ -92,6 +93,28 @@ def normalize_base_url(url):
9293
return url
9394

9495

96+
def ascii_quote(text):
97+
"""
98+
quote the ascii character range of a string
99+
100+
This utility calls will return a URL quoted value of a string for all
101+
detected ASCII characters.
102+
103+
This is primarily used to help detect prospect anchor targets in Confluence
104+
where Confluence may silently remove anchors with unsupported characters
105+
(varies per editor).
106+
107+
Args:
108+
text: the text to quote
109+
110+
Returns:
111+
the quoted text
112+
"""
113+
114+
chars = [quote(x) if ord(x) < 128 else x for x in text]
115+
return ''.join(chars)
116+
117+
95118
def convert_length(value, unit, pct=True):
96119
"""
97120
convert a length value to a confluence-supported integer-equivalent value

tests/sample-sets/header-links/rst-v1-first.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,8 @@ Vivamus iaculis, sapien ac blandit faucibus, mauris felis sodales enim, et facil
6666
6767
.. _main-rst-v1-extra2:
6868

69-
An Extra Header
70-
---------------
69+
An (Extra) Header
70+
-----------------
7171

7272
.. note::
7373

tests/sample-sets/header-links/rst-v2-first.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,8 @@ Vivamus iaculis, sapien ac blandit faucibus, mauris felis sodales enim, et facil
6666
6767
.. _main-rst-v2-extra2:
6868

69-
An Extra Header
70-
---------------
69+
An (Extra) Header
70+
-----------------
7171

7272
.. note::
7373

tests/unit-tests/test_references_confluence.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -722,6 +722,6 @@ def test_storage_references_confluence(self):
722722
self.assertIsNotNone(a_href)
723723
self.assertTrue(a_href.has_attr('href'))
724724
self.assertEqual(a_href['href'],
725-
'#Markdown-v2-Second-sub-heading-(jump-above)')
725+
'#markdown-v2-second-sub-heading-jump-above')
726726
self.assertEqual(a_href.text,
727727
'Markdown v2 Second sub-heading (jump above)')

0 commit comments

Comments
 (0)