RFR: 8254073: Tokenizer improvements (revised)
Jim Laskey
jlaskey at openjdk.java.net
Tue Oct 6 14:07:15 UTC 2020
This is a full revision of https://github.com/openjdk/jdk/pull/435 which contained two 'out by one' bugs and was
reverted.
This revision contains the changes of that pull request plus:
diff --git a/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavadocTokenizer.java
b/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavadocTokenizer.java index 39d9eadcf3a..b8425ad1ecb 100644
--- a/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavadocTokenizer.java
+++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavadocTokenizer.java
@@ -306,8 +306,9 @@ public class JavadocTokenizer extends JavaTokenizer {
*
* Thus, to find the source position of any position, p, in the comment
* string, find the index, i, of the pair whose string offset
- * ({@code map[i + SB_OFFSET] }) is closest to but not greater than p. Then,
- * {@code sourcePos(p) = map[i + POS_OFFSET] + (p - map[i + SB_OFFSET]) }.
+ * ({@code map[i * NOFFSETS + SB_OFFSET] }) is closest to but not greater
+ * than p. Then, {@code sourcePos(p) = map[i * NOFFSETS + POS_OFFSET] +
+ * (p - map[i * NOFFSETS + SB_OFFSET]) }.
*/
static class OffsetMap {
/**
@@ -426,7 +427,7 @@ public class JavadocTokenizer extends JavaTokenizer {
int start = 0;
int end = size / NOFFSETS;
- while (start < end - NOFFSETS) {
+ while (start < end - 1) {
// find an index midway between start and end
int index = (start + end) / 2;
int indexScaled = index * NOFFSETS;
diff --git a/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/UnicodeReader.java
b/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/UnicodeReader.java index 2472632dbcd..7584b79044b 100644
--- a/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/UnicodeReader.java
+++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/UnicodeReader.java
@@ -221,48 +221,49 @@ public class UnicodeReader {
private boolean unicodeEscape() {
// Start of unicode escape (past backslash.)
int start = position + width;
- int index;
+
+ // Default to backslash result, unless proven otherwise.
+ character = '\';
+ width = 1;
// Skip multiple 'u'.
+ int index;
for (index = start; index < length; index++) {
if (buffer[index] != 'u') {
break;
}
}
- // Needs to be at least backslash-u.
- if (index != start) {
- // If enough characters available.
- if (index + 4 < length) {
- // Convert four hex digits to codepoint. If any digit is invalid then the
- // result is negative.
- int code = (Character.digit(buffer[index++], 16) << 12) |
- (Character.digit(buffer[index++], 16) << 8) |
- (Character.digit(buffer[index++], 16) << 4) |
- Character.digit(buffer[index++], 16);
-
- // If all digits are good.
- if (code >= 0) {
- width = index - position;
- character = (char)code;
-
- return true;
- }
- }
+ // Needs to have been at least one u.
+ if (index == start) {
+ return false;
+ }
- // Did not work out.
- log.error(position, Errors.IllegalUnicodeEsc);
- width = index - position;
+ int code = 0;
- // Return true so that the invalid unicode escape is skipped.
- return true;
+ for (int i = 0; i < 4; i++) {
+ int digit = Character.digit(buffer[index], 16);
+ code = code << 4 | digit;
+
+ if (code < 0) {
+ break;
+ }
+
+ index++;
}
- // Must be just a backslash.
- character = '\';
- width = 1;
+ // Skip digits even if error.
+ width = index - position;
- return false;
+ // If all digits are good.
+ if (code >= 0) {
+ character = (char)code;
+ } else {
+ log.error(position, Errors.IllegalUnicodeEsc);
+ }
+
+ // Return true even if error so that the invalid unicode escape is skipped.
+ return true;
}
/**
@@ -549,7 +550,7 @@ public class UnicodeReader {
/**
* Offset from the beginning of the original reader buffer.
*/
- private int offset;
+ final private int offset;
/**
* Current column in the comment.
-------------
Commit messages:
- Merge branch 'master' into 8254073
- Merge branch 'master' into 8254073
- 8254073: Tokenizer improvements (revised)
Changes: https://git.openjdk.java.net/jdk/pull/525/files
Webrev: https://webrevs.openjdk.java.net/?repo=jdk&pr=525&range=00
Issue: https://bugs.openjdk.java.net/browse/JDK-8254073
Stats: 2349 lines in 18 files changed: 1115 ins; 597 del; 637 mod
Patch: https://git.openjdk.java.net/jdk/pull/525.diff
Fetch: git fetch https://git.openjdk.java.net/jdk pull/525/head:pull/525
PR: https://git.openjdk.java.net/jdk/pull/525
More information about the compiler-dev
mailing list