RFR: 8316704: Regex-free parsing of Formatter and FormatProcessor specifiers [v12]

温绍锦 duke at openjdk.org
Thu Sep 28 14:00:30 UTC 2023


On Wed, 27 Sep 2023 19:51:25 GMT, Raffaello Giulietti <rgiulietti at openjdk.org> wrote:

>> 温绍锦 has updated the pull request incrementally with one additional commit since the last revision:
>> 
>>   fix : the exception thrown when the input does not include conversion is different from baselne.
>
> You might consider this alternative, which IMO is simpler and more readable.
> 
> 
>         int parse() {
>             // %[argument_index$][flags][width][.precision][t]conversion
>             // %(\d+$)?([-#+ 0,(<]*)?(\d+)?(.\d+)?([tT])?([a-zA-Z%])
>             parseArgument();
>             parseFlag();
>             parseWidth();
>             int precisionSize = parsePrecision();
>             if (precisionSize < 0) {
>                 return 0;
>             }
> 
>             // ([tT])?([a-zA-Z%])
>             char t = '\0', conversion = '\0';
>             if ((c == 't' || c == 'T') && off + 1 < max) {
>                 char c1 = s.charAt(off + 1);
>                 if (isConversion(c1)) {
>                     t = c;
>                     conversion = c1;
>                     off += 2;
>                 }
>             }
>             if (conversion == '\0' && isConversion(c)) {
>                 conversion = c;
>                 ++off;
>             }
> 
>             if (argSize + flagSize + widthSize + precisionSize + t + conversion != 0) {
>                 if (al != null) {
>                     FormatSpecifier formatSpecifier
>                             = new FormatSpecifier(s, start, argSize, flagSize, widthSize, precisionSize, t, conversion);
>                     al.add(formatSpecifier);
>                 }
>                 return off - start;
>             }
>             return 0;
>         }
> 
>         private void parseArgument() {
>             // (\d+$)?
>             int i = off;
>             for (; i < max && isDigit(c = s.charAt(i)); ++i);  // empty body
>             if (i == max || c != '$') {
>                 c = first;
>                 return;
>             }
>             ++i;  // skip '$'
>             if (i < max) {
>                 c = s.charAt(i);
>             }
>             argSize = i - off;
>             off = i;
>         }
> 
>         private void parseFlag() {
>             // ([-#+ 0,(<]*)?
>             int i = off;
>             for (; i < max && Flags.isFlag(c = s.charAt(i)); ++i);  // empty body
>             flagSize = i - off;
>             off = i;
>         }
> 
>         private void parseWidth() {
>             // (\d+)?
>             int i = off;
>             for (; i < max && isDigit(c = s.charAt(i)); ++i);  // empty body
>             widthSize = i - off;
>             off = i;
>         }
> 
>         private int parsePrecision() {
>             // (.\d+)?
>             if (c != '.') {
>                 return 0;
>             }
>             int i = off + 1;
>             for (; i < max && isDigit(c = s.charAt(i)); ++i);  // empty bod...

@rgiulietti @cl4es Sorry, I plan to make a change, Now there is part of the parser code in the constructor of FormatSpecifier. This is not clear. I plan to move the parser part of the constructor of FormatSpecifier into FormatSpecifierParser. This will be clearer and the performance will be faster.

* now

        FormatSpecifier(
                String s,
                int i,
                int argSize,
                int flagSize,
                int widthSize,
                int precisionSize,
                char t,
                char conversion
        ) {
            int argEnd = i + argSize;
            int flagEnd = argEnd + flagSize;
            int widthEnd = flagEnd + widthSize;
            int precesionEnd = widthEnd + precisionSize;

            if (argSize > 0) {
                index(s, i, argEnd);
            }
            if (flagSize > 0) {
                flags(s, argEnd, flagEnd);
            }
            if (widthSize > 0) {
                width(s, flagEnd, widthEnd);
            }
            if (precisionSize > 0) {
                precision(s, widthEnd, precesionEnd);
            }
            if (t != '\0') {
                dt = true;
                if (t == 'T') {
                    flags = Flags.add(flags, Flags.UPPERCASE);
                }
            }
            conversion(conversion);
            check();
        }


* plan to:

        FormatSpecifier(int index, int flags, int width, int precision, char t, char conversion) {
            if (index > 0) {
                this.index = index;
            }
            if (flags > 0) {
                this.flags = flags;
                if (Flags.contains(flags, Flags.PREVIOUS))
                    this.index = -1;
            }
            if (width > 0) {
                this.width = width;
            }
            this.precision = precision;
            if (t != '\0') {
                dt = true;
                if (t == 'T') {
                    this.flags = Flags.add(flags, Flags.UPPERCASE);
                }
            }
            conversion(conversion);
            check();
        }


FormatSpecifier will not include the functions of parser, and the functions of parser are implemented in FormatSpecifierParser.

-------------

PR Comment: https://git.openjdk.org/jdk/pull/15776#issuecomment-1739269749


More information about the core-libs-dev mailing list