Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions wagtail_localize/models.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import re
import uuid

import polib
Expand Down Expand Up @@ -69,6 +70,9 @@
from .tasks import background


WHITESPACE_RE = re.compile(r"[ \t\n\f\r]+")


def pk(obj):
"""
A helper that gets the primary key of a model instance if one is passed in.
Expand Down Expand Up @@ -1456,10 +1460,13 @@ def from_value(cls, locale, stringvalue):
Returns:
String: The String instance that corresponds with the given stringvalue and locale.
"""

data = re.sub(WHITESPACE_RE, " ", stringvalue.data)

string, created = cls.objects.get_or_create(
locale_id=pk(locale),
data_hash=cls._get_data_hash(stringvalue.data),
defaults={"data": stringvalue.data},
data_hash=cls._get_data_hash(data),
defaults={"data": data},
)

return string
Expand Down Expand Up @@ -1707,6 +1714,11 @@ def from_text(cls, translation_of, locale, context, data):
Returns:
String: The String instance that corresponds with the given stringvalue and locale.
"""

# normalise whitespace sequences to a single space unless whitespace is contained in <pre> tag,
# in which case, leave it alone
# This is in line with https://www.w3.org/TR/html4/struct/text.html#h-9.1

segment, created = cls.objects.get_or_create(
translation_of=translation_of,
locale_id=pk(locale),
Expand Down
25 changes: 25 additions & 0 deletions wagtail_localize/tests/test_translationsource_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -614,6 +614,31 @@ def test_convert_alias(self):
)


class TestStringNormalization(TestCase):
def test_whitespace_normalization(self):
# Test normalization of whitespace characters
string_value = StringValue(
"This is a test with multiple spaces\tand\ttabs\nand\nnew\nlines"
)
normalized_string = string_value.normalize_whitespace()
self.assertEqual(
normalized_string,
"This is a test with multiple spaces and tabs and new lines",
)

def test_unicode_normalization(self):
# Test normalization of unicode characters
string_value = StringValue("Café")
normalized_string = string_value.normalize_unicode()
self.assertEqual(normalized_string, "Cafe")

def test_combined_normalization(self):
# Test combined normalization of whitespace and unicode characters
string_value = StringValue("Café\twith\nnew\nlines\nand multiple spaces")
normalized_string = string_value.normalize()
self.assertEqual(normalized_string, "Cafe with new lines and multiple spaces")


class TestCreateOrUpdateTranslationForSnippet(TestCase):
def setUp(self):
self.snippet = TestSnippet.objects.create(
Expand Down