-
-
Notifications
You must be signed in to change notification settings - Fork 43
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
bump JSP to Unicode 14 #93
Changes from all commits
816672f
ebe22c7
a3cf705
9264c9a
c11afd2
3c5e85b
c9461c5
933d2b8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -31,7 +31,7 @@ | |
import com.ibm.icu.util.VersionInfo; | ||
|
||
public class CachedProps { | ||
public static final boolean IS_BETA = false; | ||
public static final boolean IS_BETA = true; | ||
|
||
public static final Splitter HASH_SPLITTER = Splitter.on('#').trimResults(); | ||
public static final Splitter SEMI_SPLITTER = Splitter.on(';').trimResults(); | ||
|
@@ -44,7 +44,7 @@ public class CachedProps { | |
final BiMultimap<String,String> nameToAliases = new BiMultimap<String,String>(null,null); | ||
final Map<String,BiMultimap<String,String>> nameToValueToAliases = new LinkedHashMap(); | ||
|
||
static CachedProps CACHED_PROPS = getInstance(VersionInfo.getInstance(12)); | ||
static CachedProps CACHED_PROPS = getInstance(VersionInfo.getInstance(14)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this be driven by the version string in class Settings? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's a bit more complicated than that because of the interplay with the beta flag. I suggested that we go with 14 for now, and file an issue. We don't want to wait on the fuller solution. |
||
|
||
static UnicodeProperty NAMES = CachedProps.CACHED_PROPS.getProperty("Name"); | ||
|
||
|
@@ -144,8 +144,8 @@ class DelayedUnicodeProperty extends UnicodeProperty { | |
private List<String> nameAliases; | ||
private Multimap<String,String> valueToAliases; | ||
|
||
public DelayedUnicodeProperty(VersionInfo version, String propName, | ||
Collection<String> nameAliases, | ||
public DelayedUnicodeProperty(VersionInfo version, String propName, | ||
Collection<String> nameAliases, | ||
BiMultimap<String, String> biMultimap) { | ||
this.version = version; | ||
Collection<String> temp; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,6 +9,9 @@ | |
import java.util.Set; | ||
import java.util.TreeMap; | ||
import java.util.TreeSet; | ||
import java.util.concurrent.ConcurrentHashMap; | ||
import java.util.concurrent.atomic.AtomicInteger; | ||
import java.util.logging.Logger; | ||
import java.util.regex.Pattern; | ||
|
||
import com.ibm.icu.dev.util.CollectionUtilities; | ||
|
@@ -24,25 +27,70 @@ | |
* @author markdavis | ||
*/ | ||
public class ScriptTester { | ||
static Logger logger = Logger.getLogger(ScriptTester.class.getName()); | ||
private final UnicodeMap<BitSet> character_compatibleScripts; | ||
|
||
|
||
public enum CompatibilityLevel {Highly_Restrictive, Moderately_Restrictive} | ||
public enum ScriptSpecials {on, off} | ||
|
||
|
||
/** | ||
* Space reserved for script codes not in ICU | ||
*/ | ||
public static final int EXTRA_COUNT = 16; // should be enough, hard working as UTC is! | ||
public static final Map<String,Integer> extraScripts = new ConcurrentHashMap<>(EXTRA_COUNT); | ||
/** | ||
* Extended scripts; note that they do not have stable numbers, and should not be persisted. | ||
*/ | ||
public static final int | ||
public static final int | ||
//HANT = UScript.CODE_LIMIT, | ||
//HANS = HANT + 1, | ||
LIMIT = UScript.CODE_LIMIT; // HANS + 1; | ||
|
||
private static String[][] EXTENDED_NAME = {{"Hant", "Han Traditional"}, {"Hans", "Han Simplified"}}; | ||
LIMIT = UScript.CODE_LIMIT + EXTRA_COUNT; // HANS + 1; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please remove the comments about HANT/HANS -- since we have real UScript constants for them. |
||
|
||
private static String[][] EXTENDED_NAME = { | ||
// Scripts without stable numbers | ||
{"Hant", "Han Traditional"}, {"Hans", "Han Simplified"}, | ||
}; | ||
Comment on lines
+52
to
+54
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need these? ICU has UScript.SIMPLIFIED_HAN and UScript.TRADITIONAL_HAN. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure. |
||
|
||
static AtomicInteger scriptCounter = new AtomicInteger(UScript.CODE_LIMIT); | ||
|
||
static int getScriptCode(String script) { | ||
try { | ||
// If ICU has it, great | ||
return UCharacter.getPropertyValueEnum(UProperty.SCRIPT, script); | ||
} catch (com.ibm.icu.impl.IllegalIcuArgumentException iiae) { | ||
// Make something up | ||
int newCode = extraScripts.computeIfAbsent(script, script2 -> { | ||
int i = scriptCounter.getAndIncrement(); | ||
logger.warning("Synthesized scriptCode " + i + " for unrecognized script extension '"+script+"'"); | ||
return i; | ||
}); | ||
// Verify we didn't run over | ||
if (newCode >= LIMIT) { | ||
Comment on lines
+69
to
+70
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need a hard limit? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if we have more than 'extrascripts' scripts, yes |
||
throw new RuntimeException("computed script code of " + newCode + " for '"+script+"' overflows: have " + extraScripts.size() + | ||
" scripts but EXTRA_COUNT=" + EXTRA_COUNT); | ||
} | ||
return newCode; | ||
} | ||
} | ||
|
||
public static String getScriptName(int extendedScriptCode, int choice) { | ||
if (extendedScriptCode >= UScript.CODE_LIMIT) { | ||
return EXTENDED_NAME[extendedScriptCode - UScript.CODE_LIMIT][choice]; | ||
if (extendedScriptCode >= LIMIT) { | ||
return EXTENDED_NAME[extendedScriptCode - LIMIT][choice]; | ||
} else { | ||
for (Map.Entry<String, Integer> e : extraScripts.entrySet()) { | ||
if(e.getValue() == extendedScriptCode) { | ||
if(choice == 0) { | ||
return e.getKey(); | ||
} else { | ||
return "New Script '"+ e.getKey() + "'"; | ||
} | ||
} | ||
} | ||
throw new IllegalArgumentException("Unknown extended script code " + extendedScriptCode); | ||
} | ||
} | ||
return UCharacter.getPropertyValueName(UProperty.SCRIPT, extendedScriptCode, choice); | ||
} | ||
|
@@ -128,12 +176,12 @@ public boolean isOk(CharSequence input) { | |
// check numbers | ||
return true; | ||
} | ||
|
||
|
||
|
||
// TODO, cache results | ||
private BitSet getActualScripts(int cp) { | ||
BitSet actualScripts = scriptSpecials.get(cp); | ||
BitSet actualScripts = getScriptSpecials().get(cp); | ||
if (actualScripts == null) { | ||
actualScripts = new BitSet(LIMIT); | ||
int script = UCharacter.getIntPropertyValue(cp, UProperty.SCRIPT); | ||
|
@@ -143,7 +191,7 @@ private BitSet getActualScripts(int cp) { | |
} | ||
|
||
public boolean filterTable(List<Set<String>> table) { | ||
|
||
// We make one pass forward and one backward, finding if each characters scripts | ||
// are compatible with the ones before. | ||
// We then make a second pass for the ones after. | ||
|
@@ -248,7 +296,7 @@ private boolean contains(BitSet set1, BitSet set2) { | |
} | ||
|
||
public static class ScriptExtensions { | ||
|
||
public static final Comparator<BitSet> COMPARATOR = new Comparator<BitSet>() { | ||
|
||
public int compare(BitSet o1, BitSet o2) { | ||
|
@@ -260,13 +308,13 @@ public int compare(BitSet o1, BitSet o2) { | |
return n1.compareToIgnoreCase(n2); | ||
} | ||
}; | ||
|
||
private UnicodeMap<BitSet> scriptSpecials; | ||
|
||
public Collection<BitSet> getAvailableValues() { | ||
return scriptSpecials.getAvailableValues(); | ||
} | ||
|
||
public UnicodeSet getSet(BitSet value) { | ||
return scriptSpecials.getSet(value); | ||
} | ||
|
@@ -279,21 +327,21 @@ private static class MyHandler extends FileUtilities.SemiFileReader { | |
public boolean handleLine(int start, int end, String[] items) { | ||
BitSet bitSet = new BitSet(LIMIT); | ||
for (String script : SPACES.split(items[1])) { | ||
int scriptCode = UCharacter.getPropertyValueEnum(UProperty.SCRIPT, script); | ||
int scriptCode = getScriptCode(script); | ||
bitSet.set(scriptCode); | ||
} | ||
map.putAll(start, end, bitSet); | ||
return true; | ||
} | ||
} | ||
|
||
public static ScriptExtensions make(String directory, String filename) { | ||
ScriptExtensions result = new ScriptExtensions(); | ||
result.scriptSpecials = ((MyHandler) new MyHandler() | ||
.process(directory, filename)).map.freeze(); | ||
return result; | ||
} | ||
|
||
public static ScriptExtensions make(Class aClass, String filename) { | ||
ScriptExtensions result = new ScriptExtensions(); | ||
result.scriptSpecials = ((MyHandler) new MyHandler() | ||
|
@@ -312,7 +360,7 @@ public void putAllInto(UnicodeMap<BitSet> char2scripts) { | |
public static String getNames(BitSet value, int choice, String separator) { | ||
return getNames(value, choice, separator, new TreeSet<String>()); | ||
} | ||
|
||
public static String getNames(BitSet value, int choice, String separator, Set<String> names) { | ||
names.clear(); | ||
for (int i = value.nextSetBit(0); i >= 0; i = value.nextSetBit(i+1)) { | ||
|
@@ -321,12 +369,24 @@ public static String getNames(BitSet value, int choice, String separator, Set<St | |
return CollectionUtilities.join(names, separator).toString(); | ||
} | ||
} | ||
|
||
static ScriptExtensions scriptSpecials = ScriptExtensions.make(ScriptExtensions.class, "ScriptExtensions.txt"); | ||
|
||
static final class ScriptExtensionsHelper { | ||
ScriptExtensions scriptSpecials; | ||
|
||
ScriptExtensionsHelper() { | ||
scriptSpecials = ScriptExtensions.make(ScriptExtensions.class, "ScriptExtensions.txt"); | ||
} | ||
|
||
static ScriptExtensionsHelper INSTANCE = new ScriptExtensionsHelper(); | ||
} | ||
|
||
static final ScriptExtensions getScriptSpecials() { | ||
return ScriptExtensionsHelper.INSTANCE.scriptSpecials; | ||
} | ||
|
||
public static BitSet getScriptSpecials(int codepoint) { | ||
BitSet output = new BitSet(LIMIT); | ||
BitSet actualScripts = scriptSpecials.get(codepoint); | ||
BitSet actualScripts = getScriptSpecials().get(codepoint); | ||
if (actualScripts != null) { | ||
output.or(actualScripts); | ||
} else { | ||
|
@@ -340,14 +400,14 @@ public static UnicodeMap<String> getScriptSpecialsNames() { | |
UnicodeMap<String> result = new UnicodeMap<String>(); | ||
Set<String> names = new TreeSet<String>(); // to alphabetize | ||
|
||
for (BitSet value : scriptSpecials.getAvailableValues()) { | ||
result.putAll(scriptSpecials.getSet(value), ScriptExtensions.getNames(value, UProperty.NameChoice.LONG, ",", names)); | ||
for (BitSet value : getScriptSpecials().getAvailableValues()) { | ||
result.putAll(getScriptSpecials().getSet(value), ScriptExtensions.getNames(value, UProperty.NameChoice.LONG, ",", names)); | ||
} | ||
return result; | ||
} | ||
|
||
public static String[][] getScriptSpecialsAlternates() { | ||
Collection<BitSet> availableValues = scriptSpecials.getAvailableValues(); | ||
Collection<BitSet> availableValues = getScriptSpecials().getAvailableValues(); | ||
String[][] result = new String[availableValues.size()][]; | ||
Set<String> names = new TreeSet<String>(); // to alphabetize | ||
|
||
|
@@ -387,7 +447,7 @@ private Builder(CompatibilityLevel level, ScriptSpecials specials) { | |
addCompatible(UScript.LATIN, i); | ||
} | ||
// FALL THRU! | ||
case Highly_Restrictive: | ||
case Highly_Restrictive: | ||
addCompatible(UScript.LATIN, UScript.HAN, UScript.HIRAGANA, UScript.KATAKANA); | ||
//addCompatible(UScript.LATIN, HANT, UScript.HIRAGANA, UScript.KATAKANA); | ||
//addCompatible(UScript.LATIN, HANS, UScript.HIRAGANA, UScript.KATAKANA); | ||
|
@@ -413,7 +473,7 @@ private Builder(CompatibilityLevel level, ScriptSpecials specials) { | |
// fix the char2scripts mapping | ||
|
||
if (specials == ScriptSpecials.on){ | ||
scriptSpecials.putAllInto(char2scripts); | ||
getScriptSpecials().putAllInto(char2scripts); | ||
} | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm surprised at this; if based on master it would be replacing 13 by 14.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this a bug? I hadn't changed this value. Should it be calculated ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Lemme check.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, that should be the version of the beta props. I think it is built that way so that it doesn't pull in the BIN properties if BETA is off. For now, let's just leave it at 14, but file an issue.