Skip to content

Commit 247f0ec

Browse files
authored
Add strings validation helper that identifies duplicated keys (#342)
2 parents 5cf371e + 9b79539 commit 247f0ec

File tree

4 files changed

+258
-0
lines changed

4 files changed

+258
-0
lines changed
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
module Fastlane
2+
module Helper
3+
module Ios
4+
class StringsFileValidationHelper
5+
# context can be one of:
6+
# :root, :maybe_comment_start, :in_line_comment, :in_block_comment,
7+
# :maybe_block_comment_end, :in_quoted_key,
8+
# :after_quoted_key_before_eq, :after_quoted_key_and_equal,
9+
# :in_quoted_value, :after_quoted_value
10+
State = Struct.new(:context, :buffer, :in_escaped_ctx, :found_key, keyword_init: true)
11+
12+
TRANSITIONS = {
13+
root: {
14+
/\s/ => :root,
15+
'/' => :maybe_comment_start,
16+
'"' => :in_quoted_key
17+
},
18+
maybe_comment_start: {
19+
'/' => :in_line_comment,
20+
/\*/ => :in_block_comment
21+
},
22+
in_line_comment: {
23+
"\n" => :root,
24+
/./ => :in_line_comment
25+
},
26+
in_block_comment: {
27+
/\*/ => :maybe_block_comment_end,
28+
/./m => :in_block_comment
29+
},
30+
maybe_block_comment_end: {
31+
'/' => :root,
32+
/./m => :in_block_comment
33+
},
34+
in_quoted_key: {
35+
'"' => lambda do |state, _|
36+
state.found_key = state.buffer.string.dup
37+
state.buffer.string = ''
38+
:after_quoted_key_before_eq
39+
end,
40+
/./ => lambda do |state, c|
41+
state.buffer.write(c)
42+
:in_quoted_key
43+
end
44+
},
45+
after_quoted_key_before_eq: {
46+
/\s/ => :after_quoted_key_before_eq,
47+
'=' => :after_quoted_key_and_eq
48+
},
49+
after_quoted_key_and_eq: {
50+
/\s/ => :after_quoted_key_and_eq,
51+
'"' => :in_quoted_value
52+
},
53+
in_quoted_value: {
54+
'"' => :after_quoted_value,
55+
/./m => :in_quoted_value
56+
},
57+
after_quoted_value: {
58+
/\s/ => :after_quoted_value,
59+
';' => :root
60+
}
61+
}.freeze
62+
63+
# Inspects the given `.strings` file for duplicated keys, returning them
64+
# if any.
65+
#
66+
# @param [String] file The path to the file to inspect.
67+
# @return [Hash<String, Array<Int>] Hash with the dublipcated keys.
68+
# Each element has the duplicated key (from the `.strings`) as key and an array of line numbers where the key occurs as value.
69+
def self.find_duplicated_keys(file:)
70+
keys_with_lines = Hash.new([])
71+
72+
state = State.new(context: :root, buffer: StringIO.new, in_escaped_ctx: false, found_key: nil)
73+
74+
File.readlines(file).each_with_index do |line, line_no|
75+
line.chars.each_with_index do |c, col_no|
76+
# Handle escaped characters at a global level. This is more
77+
# straightforward than having a `TRANSITIONS` table that account
78+
# for it.
79+
if state.in_escaped_ctx || c == '\\'
80+
# Just because we check for escaped characters at the global
81+
# level, it doesn't mean we allow them in every context.
82+
allowed_contexts_for_escaped_characters = %i[in_quoted_key in_quoted_value in_block_comment in_line_comment]
83+
raise "Found escaped character outside of allowed contexts on line #{line_no + 1} (current context: #{state.context})" unless allowed_contexts_for_escaped_characters.include?(state.context)
84+
85+
state.buffer.write(c) if state.context == :in_quoted_key
86+
state.in_escaped_ctx = !state.in_escaped_ctx
87+
next
88+
end
89+
90+
# Look at the transitions table for the current context, and find
91+
# the first transition matching the current character
92+
(_, next_context) = TRANSITIONS[state.context].find { |regex, _| c.match?(regex) } || [nil, nil]
93+
raise "Invalid character `#{c}` found on line #{line_no + 1}, col #{col_no + 1}" if next_context.nil?
94+
95+
state.context = next_context.is_a?(Proc) ? next_context.call(state, c) : next_context
96+
next unless state.found_key
97+
98+
# If we just exited the :in_quoted_key context and thus have found
99+
# a new key, process it
100+
key = state.found_key.dup
101+
state.found_key = nil
102+
103+
keys_with_lines[key] += [line_no + 1]
104+
end
105+
end
106+
107+
keys_with_lines.keep_if { |_, lines| lines.count > 1 }
108+
end
109+
end
110+
end
111+
end
112+
end
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
require 'spec_helper'
2+
3+
describe Fastlane::Helper::Ios::StringsFileValidationHelper do
4+
let(:test_data_dir) { File.join(File.dirname(__FILE__), 'test-data', 'translations') }
5+
6+
context 'when there is an escape character in the root context' do
7+
it 'raises' do
8+
input = File.join(test_data_dir, 'strings-with-escape-character-in-root-context.strings')
9+
expect { described_class.find_duplicated_keys(file: input) }
10+
.to raise_error(RuntimeError, 'Found escaped character outside of allowed contexts on line 8 (current context: root)')
11+
end
12+
end
13+
14+
context 'when there are duplicated keys' do
15+
it 'returns them in an array' do
16+
input = File.join(test_data_dir, 'file-with-duplicated-keys.strings')
17+
18+
expect(described_class.find_duplicated_keys(file: input)).to eq(
19+
{
20+
'dup1' => [30, 31],
21+
'dup2' => [33, 35],
22+
'dup3' => [36, 39],
23+
'dup4' => [41, 42],
24+
'\U0025 key' => [49, 50],
25+
'\U0026 key' => [52, 54],
26+
'key with \"%@\" character' => [60, 61],
27+
'key with \"%@\" but diff translations' => [63, 64],
28+
'key with multiple \"%@\" escapes \":)\" in it' => [66, 68],
29+
'Login to a \"%@\" account' => [67, 69],
30+
'key with trailing spaces ' => [76, 77],
31+
'key with \"%@\" and = character' => [71, 72],
32+
'key with \"%@\" character and equal in translation' => [73, 74],
33+
'key repeated more than twice' => [79, 80, 81]
34+
}
35+
)
36+
end
37+
end
38+
39+
context 'when there are no duplicated keys' do
40+
it 'returns an empty array' do
41+
# Piggy back on some of the `.strings` from other tests to ensure this
42+
# behaves correctly
43+
expect(
44+
described_class.find_duplicated_keys(
45+
file: File.join(test_data_dir, 'ios_l10n_helper', 'expected-merged.strings')
46+
)
47+
).to be_empty
48+
expect(
49+
described_class.find_duplicated_keys(
50+
file: File.join(test_data_dir, 'ios_extract_keys_from_strings_files', 'Resources', 'en.lproj', 'Localizable.strings')
51+
)
52+
).to be_empty
53+
end
54+
end
55+
end
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
/* A comment */
2+
"key" = "localized translation here";
3+
/* A value with escaped characters */
4+
"key.key" = "Hello \"World\"!";
5+
/* Keys with escaped characters */
6+
"it's a \" = " = "trap";
7+
"it's another \\" = "trap";
8+
/* A multi-line value */
9+
"error.message" = "One line.
10+
Another line.";
11+
/* A multi-line
12+
comment
13+
*/
14+
"comment" = "comment";
15+
/* Below are two keys with leading spaces to test against malformatted files */
16+
"space" = "localized translation here";
17+
"more.space" = "localized translation here";
18+
/* Below are some entries with unusual spaces between key and value to test against malformed files */
19+
"nospace"="localized translation here";
20+
"space.nospace" ="localized translation here";
21+
"nospace.space"= "localized translation here";
22+
"lots of spaces" = "localized translation here";
23+
/* Special case: \"%@\" can be tricky to detect */
24+
"Key escaped parameter \"%@\"" = "localized translation here";
25+
"Something \"something\" something \"else\"" = "localized translation here";
26+
/*
27+
* Duplicated keys
28+
*/
29+
/* Consecutive duplicated keys */
30+
"dup1" = "localized translation here";
31+
"dup1" = "localized translation here";
32+
/* Duplicated keys with other entries in between */
33+
"dup2" = "localized translation here";
34+
"not dup" = "localized translation here";
35+
"dup2" = "localized translation here";
36+
"dup3" = "localized translation here";
37+
"not dup 2" = "localized translation here";
38+
"not dup 3" = "localized translation here";
39+
"dup3" = "localized translation here";
40+
/* Duplicated keys with different translations are still duplicated */
41+
"dup4" = "a translation";
42+
"dup4" = "a different translation";
43+
/* Duplicated comments should be ignored */
44+
"key with dup comment" = "localized translation here";
45+
/* Duplicated comments should be ignored */
46+
"different key with dup comment" = "localized translation here";
47+
/* Unicode codepoint escape — see https://developer.apple.com/library/archive/documentation/Cocoa/Conceptual/LoadingResources/Strings/Strings.html#//apple_ref/doc/uid/10000051i-CH6-SW13 */
48+
/* Consecutive... */
49+
"\U0025 key" = "\U0025 is the % symbol";
50+
"\U0025 key" = "\U0025 is the % symbol";
51+
/* ...and not consecutive */
52+
"\U0026 key" = "\U0025 is the & symbol";
53+
"unicode\U0020key" = "\U0020 is the space character";
54+
"\U0026 key" = "\U0025 is the & symbol";
55+
/* Special case: \"%@\" can be tricky to detect */
56+
"The Google account \"%@\" doesn't match any account on WordPress.com" = "localized translation here";
57+
/* A red herring that might be seen as duplicate to the previous one if our RegExp does not correctly account for escaped quotes */
58+
"The Google account \"%@\" is invalid" = "localized translation here";
59+
/* Duplicated keys that include escaped quotes */
60+
"key with \"%@\" character" = "localized translation here";
61+
"key with \"%@\" character" = "localized translation here";
62+
/* Red herring: duplicated keys which include quotes and equal sign in their translations, making the RegExp mismatch all the preceding portion as being all part of the key */
63+
"key with \"%@\" but diff translations" = "localized translation 1 for \"%@\" = 1";
64+
"key with \"%@\" but diff translations" = "localized translation 2 for \"%@\" = 2";
65+
/* Duplicated keys that include escaped quotes, interleaved */
66+
"key with multiple \"%@\" escapes \":)\" in it" = "localized translation here";
67+
"Login to a \"%@\" account" = "Login to a \"%@\" account"; // Here %@ = the service name, like Google
68+
"key with multiple \"%@\" escapes \":)\" in it" = "localized translation here";
69+
"Login to a \"%@\" account" = "Login to a \"%@\" account"; // Here %@ = the service name, like Google
70+
/* Duplicated keys with escaped quotes and equals */
71+
"key with \"%@\" and = character" = "localized translation here";
72+
"key with \"%@\" and = character" = "localized translation here";
73+
"key with \"%@\" character and equal in translation" = "localized = translation here";
74+
"key with \"%@\" character and equal in translation" = "localized = translation here";
75+
/* Duplicated keys with trailing spaces */
76+
"key with trailing spaces " = "localized translation here";
77+
"key with trailing spaces " = "localized translation here";
78+
/* What happens if a key is repeated more than twice? */
79+
"key repeated more than twice" = "a translation";
80+
"key repeated more than twice" = "a translation";
81+
"key repeated more than twice" = "a translation";
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
/* This file has characters root context (not in a comment, or key, or value), which is not a valid `.strings` syntax */
2+
"key" = "value";
3+
"other key" = "other value";
4+
5+
/* Escaped characters should be allowed in block comments \\ */
6+
"yet another key" = "yet another value"; // Escapes should be allowed in inline comments, too \\
7+
8+
\\
9+
/* The parsing shouldn't reach this line. It should tell us the escape up there is not valid. */
10+
"final key" = "final value";

0 commit comments

Comments
 (0)