RFR: 8309622: Re-examine the cache mechanism in BaseLocale [v3]
Brett Okken
duke at openjdk.org
Fri Jun 16 23:07:09 UTC 2023
On Fri, 16 Jun 2023 17:18:33 GMT, Naoto Sato <naoto at openjdk.org> wrote:
>> src/java.base/share/classes/sun/util/locale/BaseLocale.java line 168:
>>
>>> 166: // BaseLocale as the key. The returned "normalized" instance
>>> 167: // can subsequently be used by the Locale instance which
>>> 168: // guarantees the locale components are properly cased/interned.
>>
>> Is it truly important that the String values are interned, or is the desire simply that all refer to the same "canonical" String instance?
>> my thought is that managing the canonical String instances could avoid WeakReferences and synchronized map access and just return a new BaseLocale each time.
>>
>>
>>
>>
>> public static BaseLocale getInstance(String language, String script,
>> String region, String variant) {
>> if (script == null || script.isEmpty()) {
>> script = "";
>> } else {
>> script = getCanonicalString(script, LocaleUtils::toTitleString);
>> }
>>
>> if (region == null || region.isEmpty()) {
>> region = "";
>> } else {
>> region = getCanonicalString(region, LocaleUtils::toUpperString);
>> }
>>
>> if (language == null || language.isEmpty()) {
>> language = "";
>> } else {
>> language = getCanonicalString(language, LocaleUtils::toLowerString);
>> }
>>
>> if (variant == null || variant.isEmpty()) {
>> variant = "";
>> } else {
>> variant = getCanonicalString(variant, Function.identity());
>> }
>> ...
>> return new BaseLocale(language, script, region, variant);
>> }
>>
>>
>>
>> private static final ConcurrentMap<String, String> CANONICALIZED_STRINGS = new ConcurrentHashMap<>();
>>
>> static {
>> // prime the old iso codes
>> CANONICALIZED_STRINGS.put("he", "he");
>> CANONICALIZED_STRINGS.put("iw", "iw");
>> CANONICALIZED_STRINGS.put("id", "id");
>> CANONICALIZED_STRINGS.put("in", "in");
>> CANONICALIZED_STRINGS.put("ji", "ji");
>> CANONICALIZED_STRINGS.put("yi", "yi");
>> }
>>
>> private static String getCanonicalString(String value, Function<String, String> conversion) {
>> String existing = CANONICALIZED_STRINGS.get(value);
>> if (existing != null) {
>> return existing;
>> }
>>
>> String converted = conversion.apply(value);
>> return CANONICALIZED_STRINGS.merge(converted, converted, (k,v) -> v);...
>
> Interning components in a Locale instance (through BaseLocale) is a long standing behavior. Changing it could break apps that might have relied on it (using `==` for comparison)
as in doing `"en" == locale.getLanguage()`?
In that case, the interned values could still be used as the canonical values, and the CHM can account both for case adjustments and interning (and likely faster than actually calling intern). I am curious how this does with your jmh test:
private static final ConcurrentMap<String, String> CANONICALIZED_UPPER_STRINGS = new ConcurrentHashMap<>();
private static final ConcurrentMap<String, String> CANONICALIZED_LOWER_STRINGS = new ConcurrentHashMap<>();
private static final ConcurrentMap<String, String> CANONICALIZED_TITLE_STRINGS = new ConcurrentHashMap<>();
private static final ConcurrentMap<String, String> CANONICALIZED_STRINGS = new ConcurrentHashMap<>();
public static BaseLocale getInstance(String language, String script,
String region, String variant) {
if (script == null || script.isEmpty()) {
script = "";
} else {
script = getCanonicalString(script, LocaleUtils::toTitleString, CANONICALIZED_TITLE_STRINGS);
}
if (region == null || region.isEmpty()) {
region = "";
} else {
region = getCanonicalString(region, LocaleUtils::toUpperString, CANONICALIZED_UPPER_STRINGS);
}
if (language == null || language.isEmpty()) {
language = "";
} else {
language = getCanonicalString(language, LocaleUtils::toLowerString, CANONICALIZED_LOWER_STRINGS);
}
if (variant == null || variant.isEmpty()) {
variant = "";
} else {
variant = getCanonicalString(variant, Function.identity(), CANONICALIZED_STRINGS);
}
...
return new BaseLocale(language, script, region, variant);
}
private static String getCanonicalString(String value, Function<String, String> conversion, ConcurrentMap<String, String> map) {
String existing = map.get(value);
if (existing != null) {
return existing;
}
String converted = conversion.apply(value).intern();
// put the interned value in, keyed by the interned value
map.putIfAbsent(converted, converted);
// also put the interned value in for the given key, if not already present
// this avoids future needs to apply a conversion to the value
map.putIfAbsent(value, converted);
return converted;
}
-------------
PR Review Comment: https://git.openjdk.org/jdk/pull/14404#discussion_r1232867251
More information about the core-libs-dev
mailing list