RFR: 8316704: Regex-free parsing of Formatter and FormatProcessor specifiers [v12]
Raffaello Giulietti
rgiulietti at openjdk.org
Wed Sep 27 19:54:16 UTC 2023
On Wed, 27 Sep 2023 19:13:02 GMT, 温绍锦 <duke at openjdk.org> wrote:
>> @cl4es made performance optimizations for the simple specifiers of String.format in PR https://github.com/openjdk/jdk/pull/2830. Based on the same idea, I continued to make improvements. I made patterns like %2d %02d also be optimized.
>>
>> The following are the test results based on MacBookPro M1 Pro:
>>
>>
>> -Benchmark Mode Cnt Score Error Units
>> -StringFormat.complexFormat avgt 15 1862.233 ? 217.479 ns/op
>> -StringFormat.int02Format avgt 15 312.491 ? 26.021 ns/op
>> -StringFormat.intFormat avgt 15 84.432 ? 4.145 ns/op
>> -StringFormat.longFormat avgt 15 87.330 ? 6.111 ns/op
>> -StringFormat.stringFormat avgt 15 63.985 ? 11.366 ns/op
>> -StringFormat.stringIntFormat avgt 15 87.422 ? 0.147 ns/op
>> -StringFormat.widthStringFormat avgt 15 250.740 ? 32.639 ns/op
>> -StringFormat.widthStringIntFormat avgt 15 312.474 ? 16.309 ns/op
>>
>> +Benchmark Mode Cnt Score Error Units
>> +StringFormat.complexFormat avgt 15 740.626 ? 66.671 ns/op (+151.45)
>> +StringFormat.int02Format avgt 15 131.049 ? 0.432 ns/op (+138.46)
>> +StringFormat.intFormat avgt 15 67.229 ? 4.155 ns/op (+25.59)
>> +StringFormat.longFormat avgt 15 66.444 ? 0.614 ns/op (+31.44)
>> +StringFormat.stringFormat avgt 15 62.619 ? 4.652 ns/op (+2.19)
>> +StringFormat.stringIntFormat avgt 15 89.606 ? 13.966 ns/op (-2.44)
>> +StringFormat.widthStringFormat avgt 15 52.462 ? 15.649 ns/op (+377.95)
>> +StringFormat.widthStringIntFormat avgt 15 101.814 ? 3.147 ns/op (+206.91)
>
> 温绍锦 has updated the pull request incrementally with one additional commit since the last revision:
>
> fix : the exception thrown when the input does not include conversion is different from baselne.
You might consider this alternative, which IMO is simpler and more readable.
int parse() {
// %[argument_index$][flags][width][.precision][t]conversion
// %(\d+$)?([-#+ 0,(<]*)?(\d+)?(.\d+)?([tT])?([a-zA-Z%])
parseArgument();
parseFlag();
parseWidth();
int precisionSize = parsePrecision();
if (precisionSize < 0) {
return 0;
}
// ([tT])?([a-zA-Z%])
char t = '\0', conversion = '\0';
if ((c == 't' || c == 'T') && off + 1 < max) {
char c1 = s.charAt(off + 1);
if (isConversion(c1)) {
t = c;
conversion = c1;
off += 2;
}
}
if (conversion == '\0' && isConversion(c)) {
conversion = c;
++off;
}
if (argSize + flagSize + widthSize + precisionSize + t + conversion != 0) {
if (al != null) {
FormatSpecifier formatSpecifier
= new FormatSpecifier(s, start, argSize, flagSize, widthSize, precisionSize, t, conversion);
al.add(formatSpecifier);
}
return off - start;
}
return 0;
}
private void parseArgument() {
// (\d+$)?
int i = off;
for (; i < max && isDigit(c = s.charAt(i)); ++i); // empty body
if (i == max || c != '$') {
c = first;
return;
}
++i; // skip '$'
if (i < max) {
c = s.charAt(i);
}
argSize = i - off;
off = i;
}
private void parseFlag() {
// ([-#+ 0,(<]*)?
int i = off;
for (; i < max && Flags.isFlag(c = s.charAt(i)); ++i); // empty body
flagSize = i - off;
off = i;
}
private void parseWidth() {
// (\d+)?
int i = off;
for (; i < max && isDigit(c = s.charAt(i)); ++i); // empty body
widthSize = i - off;
off = i;
}
private int parsePrecision() {
// (.\d+)?
if (c != '.') {
return 0;
}
int i = off + 1;
for (; i < max && isDigit(c = s.charAt(i)); ++i); // empty body
if (i == off + 1) { // a '.' but no digits
return -1;
}
int size = i - off;
off = i;
return size;
}
-------------
PR Comment: https://git.openjdk.org/jdk/pull/15776#issuecomment-1737986458
More information about the core-libs-dev
mailing list