Skip to content

Commit 716d8ca

Browse files
committed
autodoc: Change dictionary sort order
This makes this more in line with Data::Dumper sorting. upper/lower case continues to not matter, and numbers continue to come after letters, so that ckWARN2() comes after plain ckWARN(). It changes non-leading underscores to come before letters, so that ck_warner comes before ckWARN. And it changes so leading underscores come after non-leading, so that aMY_CXT and aMY_CXT_ come before _aMY_CXT.
1 parent e8196e9 commit 716d8ca

File tree

1 file changed

+29
-33
lines changed

1 file changed

+29
-33
lines changed

autodoc.pl

Lines changed: 29 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#!/usr/bin/perl -w
22

3+
use v5.41;
4+
no feature 'signatures'; # For the time being, to avoid converting
35
use Text::Tabs;
46

57
# Unconditionally regenerate:
@@ -2195,48 +2197,42 @@ sub dictionary_order {
21952197

21962198
no warnings 'non_unicode';
21972199

2198-
local $a = $a;
2199-
local $b = $b;
2200+
my $mod_string_for_dictionary_order = sub {
2201+
my $string = shift;
22002202

2201-
# Convert all digit sequences to same length with leading zeros, so for
2202-
# example, 8 will compare less than 16 (using a fill length value that
2203-
# should be longer than any sequence in the input).
2204-
$a =~ s/(\d+)/sprintf "%06d", $1/ge;
2205-
$b =~ s/(\d+)/sprintf "%06d", $1/ge;
2203+
# Convert all digit sequences to be the same length with leading
2204+
# zeros, so that, for example '8' will sort before '16' (using a fill
2205+
# length value that should be longer than any sequence in the input).
2206+
$string =~ s/(\d+)/sprintf "%06d", $1/ge;
22062207

2207-
# Translate any underscores and digits so they compare after all Unicode
2208-
# characters
2209-
$a =~ tr[_0-9]/\x{110000}-\x{11000A}/;
2210-
$b =~ tr[_0-9]/\x{110000}-\x{11000A}/;
2208+
# Translate any underscores so they sort lowest. This causes
2209+
# 'word1_word2' to sort before 'word1word2' for all words. And
2210+
# translate any digits so they come after anything else. This causes
2211+
# digits to sort highest)
2212+
$string =~ tr[_0-9]/\0\x{110000}-\x{110009}/;
22112213

2212-
use feature 'state';
2213-
# Modify \w, \W to reflect the changes.
2214-
state $ud = '\x{110000}-\x{11000A}'; # xlated underscore, digits
2215-
state $w = "\\w$ud"; # new \w string
2216-
state $mod_w = qr/[$w]/;
2217-
state $mod_W = qr/[^$w]/;
2214+
# Then move leading underscores to the end, translating them to above
2215+
# everything else. This causes '_word_' to compare just after 'word_'
2216+
$string .= "\x{11000A}" x length $1 if $string =~ s/ ^ (\0+) //x;
22182217

2219-
# Only \w for initial comparison
2220-
my $a_only_word = uc($a =~ s/$mod_W//gr);
2221-
my $b_only_word = uc($b =~ s/$mod_W//gr);
2218+
return $string;
2219+
};
22222220

2223-
# And not initial nor interior underscores nor digits (by squeezing them
2224-
# out)
2225-
my $a_stripped = $a_only_word =~ s/ (*atomic:[$ud]+) (*pla: $mod_w ) //grxx;
2226-
my $b_stripped = $b_only_word =~ s/ (*atomic:[$ud]+) (*pla: $mod_w ) //grxx;
2221+
# Modify \w, \W to reflect what the above sub does.
2222+
state $w = "\\w\0\x{110000}-\x{11000A}"; # new \w string
2223+
state $mod_w = qr/[$w]/;
2224+
state $mod_W = qr/[^$w]/;
22272225

2228-
# If the stripped versions differ, use that as the comparison.
2229-
my $cmp = $a_stripped cmp $b_stripped;
2230-
return $cmp if $cmp;
2226+
local $a = $mod_string_for_dictionary_order->($a);
2227+
local $b = $mod_string_for_dictionary_order->($b);
22312228

2232-
# For the first tie breaker, repeat, but consider initial and interior
2233-
# underscores and digits, again having those compare after all Unicode
2234-
# characters
2235-
$cmp = $a_only_word cmp $b_only_word;
2229+
# If the strings stripped of \W differ, use that as the comparison.
2230+
my $cmp = lc ($a =~ s/$mod_W//gr) cmp lc ($b =~ s/$mod_W//gr);
22362231
return $cmp if $cmp;
22372232

2238-
# Next tie breaker is just a caseless comparison
2239-
$cmp = uc($a) cmp uc($b);
2233+
# For the first tie breaker use a plain caseless comparison of the
2234+
# modified strings
2235+
$cmp = lc $a cmp lc $b;
22402236
return $cmp if $cmp;
22412237

22422238
# Finally a straight comparison

0 commit comments

Comments
 (0)