Merge 2023-02 LWG Motion 11

tkoeppe · web-flow · commit 368bc96f500d · 2023-03-11T15:57:58.000Z
P2572R1 std::format fill character allowances
diff --git a/source/utilities.tex b/source/utilities.tex
@@ -14774,7 +14774,7 @@
 \fmtgrammarterm{std-format-spec}.
 \begin{note}
 The format specification can be used to specify such details as
-field width, alignment, padding, and decimal precision.
+minimum field width, alignment, padding, and decimal precision.
 Some of the formatting options
 are only supported for arithmetic types.
 \end{note}
@@ -14823,63 +14823,108 @@
 \end{ncbnf}
 
 \pnum
+Field widths are specified in defnadj{field width}{units};
+the number of column positions required to display a sequence of
+characters in a terminal.
+The \defnadj{minimum}{field width}
+is the number of field width units a replacement field minimally requires of
+the formatted sequence of characters produced for a format argument.
+The \defnadj{estimated}{field width} is the number of field width units
+that are required for the formatted sequence of characters
+produced for a format argument independent of
+the effects of the \fmtgrammarterm{width} option.
+The \defnadj{padding}{width} is the greater of \tcode{0} and
+the difference of the minimum field width and the estimated field width.
+
+\begin{note}
+The POSIX \tcode{wcswidth} function is an example of a function that,
+given a string, returns the number of column positions required by
+a terminal to display the string.
+\end{note}
+
+\pnum
+The \defnadj{fill}{character} is the character denoted by
+the \fmtgrammarterm{fill} option or,
+if the \fmtgrammarterm{fill} option is absent, the space character.
+For a format specification in UTF-8, UTF-16, or UTF-32,
+the fill character corresponds to a single Unicode scalar value.
 \begin{note}
-The \fmtgrammarterm{fill} character can be any character
-other than \tcode{\{} or \tcode{\}}.
-The presence of a fill character is signaled by the
-character following it, which must be one of the alignment options.
+The presence of a \fmtgrammarterm{fill} option
+is signaled by the character following it,
+which must be one of the alignment options.
 If the second character of \fmtgrammarterm{std-format-spec}
 is not a valid alignment option,
-then it is assumed that both the fill character and the alignment option are
-absent.
+then it is assumed that
+the \fmtgrammarterm{fill} and \fmtgrammarterm{align} options
+are both absent.
 \end{note}
 
 \pnum
-The \fmtgrammarterm{align} specifier applies to all argument types.
+The \fmtgrammarterm{align} option applies to all argument types.
 The meaning of the various alignment options is as specified in \tref{format.align}.
 \begin{example}
+%FIXME: example is incomplete, sB and sC result in:
+%Error: Invalid UTF-8 byte sequence.
 \begin{codeblock}
 char c = 120;
-string s0 = format("{:6}", 42);         // value of \tcode{s0} is \tcode{"\ \ \ \ 42"}
-string s1 = format("{:6}", 'x');        // value of \tcode{s1} is \tcode{"x\ \ \ \ \ "}
-string s2 = format("{:*<6}", 'x');      // value of \tcode{s2} is \tcode{"x*****"}
-string s3 = format("{:*>6}", 'x');      // value of \tcode{s3} is \tcode{"*****x"}
-string s4 = format("{:*^6}", 'x');      // value of \tcode{s4} is \tcode{"**x***"}
-string s5 = format("{:6d}", c);         // value of \tcode{s5} is \tcode{"\ \ \ 120"}
-string s6 = format("{:6}", true);       // value of \tcode{s6} is \tcode{"true\ \ "}
+string s0 = format("{:6}", 42);             // value of \tcode{s0} is \tcode{"\ \ \ \ 42"}
+string s1 = format("{:6}", 'x');            // value of \tcode{s1} is \tcode{"x\ \ \ \ \ "}
+string s2 = format("{:*<6}", 'x');          // value of \tcode{s2} is \tcode{"x*****"}
+string s3 = format("{:*>6}", 'x');          // value of \tcode{s3} is \tcode{"*****x"}
+string s4 = format("{:*^6}", 'x');          // value of \tcode{s4} is \tcode{"**x***"}
+string s5 = format("{:6d}", c);             // value of \tcode{s5} is \tcode{"\ \ \ 120"}
+string s6 = format("{:6}", true);           // value of \tcode{s6} is \tcode{"true\ \ "}
+string s7 = format("{:*<6.3}", "123456");   // value of \tcode{s7} is \tcode{"123***"}
+string s8 = format("{:02}", 1234);          // value of \tcode{s8} is \tcode{"1234"}
+string s9 = format("{:*<}", "12");          // value of \tcode{s9} is \tcode{"12"}
+string sA = format("{:*<6}", "12345678");   // value of \tcode{sA} is \tcode{"12345678"}
 \end{codeblock}
 \end{example}
 \begin{note}
-Unless a minimum field width is defined, the field width is determined by
-the size of the content and the alignment option has no effect.
+The \fmtgrammarterm{fill}, \fmtgrammarterm{align}, and \tcode{0} options
+have no effect when the minimum field width
+is not greater than the estimated field width
+because padding width is \tcode{0} in that case.
+Since fill characters are assumed to have a field width of \tcode{1},
+use of a character with a different field width can produce misaligned output.
+%FIXME: cannot show clown face character below.
+The \unicode{1f921}{clown face} character has a field width of \tcode{2}.
+The examples above that include that character
+illustrate the effect of the field width
+when that character is used as a fill character
+as opposed to when it is used as a formatting argument.
 \end{note}
 
 \begin{floattable}{Meaning of \fmtgrammarterm{align} options}{format.align}{lp{.8\hsize}}
 \topline
 \lhdr{Option} & \rhdr{Meaning} \\ \rowsep
 \tcode{<} &
-Forces the field to be aligned to the start of the available space.
+Forces the formatted argument to be aligned to the start of the field
+by inserting $n$ fill characters after the formatted argument
+where $n$ is the padding width.
 This is the default for
 non-arithmetic non-pointer types, \tcode{charT}, and \tcode{bool},
 unless an integer presentation type is specified.
 \\ \rowsep
 %
 \tcode{>} &
-Forces the field to be aligned to the end of the available space.
+Forces the formatted argument to be aligned to the end of the field
+by inserting $n$ fill characters before the formatted argument
+where $n$ is the padding width.
 This is the default for
 arithmetic types other than \tcode{charT} and \tcode{bool},
 pointer types,
 or when an integer presentation type is specified.
 \\ \rowsep
 %
 \tcode{\caret} &
-Forces the field to be centered within the available space
+Forces the formatted argument to be centered within the field
 by inserting
 $\bigl\lfloor \frac{n}{2} \bigr\rfloor$
-characters before and
+fill characters before and
 $\bigl\lceil \frac{n}{2} \bigr\rceil$
-characters after the value, where
-$n$ is the total number of fill characters to insert.
+fill characters after the formatted argument, where
+$n$ is the padding width.
 \\
 \end{floattable}
 
@@ -14955,50 +15000,45 @@
 trailing zeros are not removed from the result.
 
 \pnum
-A zero (\tcode{0}) character
-preceding the \fmtgrammarterm{width} field
-pads the field with leading zeros (following any indication of sign or base)
-to the field width,
-except when applied to an infinity or NaN.
-This option is only valid for
-arithmetic types other than \tcode{charT} and \tcode{bool}
-or when an integer presentation type is specified.
-If the \tcode{0} character and an \fmtgrammarterm{align} option both appear,
-the \tcode{0} character is ignored.
+The \tcode{0} option is valid for arithmetic types
+other than \tcode{charT} and \tcode{bool} or
+when an integer presentation type is specified.
+For formatting arguments that have a value
+other than an infinity or a NaN,
+this option pads the formatted argument by
+inserting the \tcode{0} character $n$ times
+following the sign or base prefix indicators (if any)
+where $n$ is \tcode{0} if the \fmtgrammarterm{align} option is present and
+is the padding width otherwise.
 \begin{example}
 \begin{codeblock}
 char c = 120;
 string s1 = format("{:+06d}", c);       // value of \tcode{s1} is \tcode{"+00120"}
 string s2 = format("{:#06x}", 0xa);     // value of \tcode{s2} is \tcode{"0x000a"}
-string s3 = format("{:<06}", -42);      // value of \tcode{s3} is \tcode{"-42\ \ \ "} (\tcode{0} is ignored because of \tcode{<} alignment)
+string s3 = format("{:<06}", -42);      // value of \tcode{s3} is \tcode{"-42\ \ \ "} (\tcode{0} has no effect)
+string s4 = format("{:06}", inf);       // value of \tcode{s4} is \tcode{"   inf"} (\tcode{0} has no effect)
 \end{codeblock}
 \end{example}
 
+\pnum
+The \fmtgrammarterm{width} option specifies the minimum field width.
+If the \fmtgrammarterm{width} option is absent,
+the minimum field width is \tcode{0}.
+
 \pnum
 If \tcode{\{ \opt{\fmtgrammarterm{arg-id}} \}} is used in
-a \fmtgrammarterm{width} or \fmtgrammarterm{precision},
-the value of the corresponding formatting argument is used in its place.
+a \fmtgrammarterm{width} or \fmtgrammarterm{precision} option,
+the value of the corresponding formatting argument is used as the value of the option.
 If the corresponding formatting argument is
 not of standard signed or unsigned integer type, or
 its value is negative,
 an exception of type \tcode{format_error} is thrown.
 
 \pnum
 % FIXME: What if it's an arg-id?
-The \fmtgrammarterm{positive-integer} in
-\fmtgrammarterm{width} is a decimal integer defining the minimum field width.
-If \fmtgrammarterm{width} is not specified,
-there is no minimum field width, and
-the field width is determined based on the content of the field.
-
-\pnum
-\indextext{string!width}%
-The \defn{width} of a string is defined as
-the estimated number of column positions appropriate
-for displaying it in a terminal.
-\begin{note}
-This is similar to the semantics of the POSIX \tcode{wcswidth} function.
-\end{note}
+If \fmtgrammarterm{positive-integer} is used in a
+\fmtgrammarterm{width} option, the value of the decimal integer
+is used as the value of the option.
 
 \pnum
 For the purposes of width computation,
@@ -15019,44 +15059,45 @@
 \end{note}
 
 \pnum
-For a string in UTF-8, UTF-16, or UTF-32,
-implementations should estimate the width of a string
-as the sum of estimated widths of
-the first code points in its extended grapheme clusters.
-The extended grapheme clusters of a string are defined by \UAX{29}.
-The estimated width of the following code points is 2:
+For a sequence of characters in UTF-8, UTF-16, or UTF-32,
+an implementation should use as its field width
+the sum of the field widths of the first code point
+of each extended grapheme cluster.
+Extended grapheme clusters are defined by \UAX{29} of the Unicode Standard.
+The following code points have a field width of 2:
 \begin{itemize}
 \item
-Any code point with the \tcode{East_Asian_Width="W"} or
+any code point with the \tcode{East_Asian_Width="W"} or
 \tcode{East_Asian_Width="F"} Derived Extracted Property as described by
-\UAX{44}
+\UAX{44} of the Unicode Standard
 \item
 \ucode{4dc0} -- \ucode{4dff} (Yijing Hexagram Symbols)
 \item
 \ucode{1f300} -- \ucode{1f5ff} (Miscellaneous Symbols and Pictographs)
 \item
 \ucode{1f900} -- \ucode{1f9ff} (Supplemental Symbols and Pictographs)
 \end{itemize}
-The estimated width of other code points is 1.
+The field width of all other code points is 1.
 
 \pnum
-For a string in neither UTF-8, UTF-16, nor UTF-32,
-the width of a string is unspecified.
+For a sequence of characters in neither UTF-8, UTF-16, nor UTF-32,
+the field width is unspecified.
 
 \pnum
-% FIXME: What if it's an arg-id?
-The \fmtgrammarterm{nonnegative-integer} in
-\fmtgrammarterm{precision} is a decimal integer defining
-the precision or maximum field size.
-It can only be used with floating-point and string types.
-For floating-point types this field specifies the formatting precision.
-For string types, this field provides an upper bound
-for the estimated width of the prefix of
-the input string that is copied into the output.
-For a string in UTF-8, UTF-16, or UTF-32,
-the formatter copies to the output
-the longest prefix of whole extended grapheme clusters
-whose estimated width is no greater than the precision.
+The \fmtgrammarterm{precision} option is valid
+for floating-point and string types.
+For floating-point types,
+the value of this option specifies the precision
+to be used for the floating-point presentation type.
+For string types,
+this option specifies the longest prefix of the formatted argument
+to be included in the replacement field such that
+the field width of the prefix is no greater than the value of this option.
+
+\pnum
+If \fmtgrammarterm{nonnegative-integer} is used in
+a \fmtgrammarterm{precision} option,
+the value of the decimal integer is used as the value of the option.
 
 \pnum
 When the \tcode{L} option is used, the form used for the conversion is called