Skip to content

Commit

Permalink
museumsvictoria#157: treat 0xa0 NO-BREAK-SPACE; and tidy-up
Browse files Browse the repository at this point in the history
  • Loading branch information
justparking authored and scroix committed Apr 1, 2024
1 parent 85723e1 commit b8916be
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 17 deletions.
12 changes: 6 additions & 6 deletions nodel-framework/src/main/java/org/nodel/SimpleName.java
Original file line number Diff line number Diff line change
Expand Up @@ -373,13 +373,13 @@ public static String flatten(String name, char[] passthrough) {
sb.append(Character.toLowerCase(c));
}

// spaces and most ASCII control codes
// skip spaces and most ASCII control codes
else if (c <= 32) {
continue;
}

// else ASCII letters or digits and everything else (extended ASCII and Unicode)
else if (c > 127 || Character.isLetterOrDigit(c)) {
// else incl. letters or digits and everything else non-7bit-ASCII that is not a space
else if (Character.isLetterOrDigit(c) || (c > 127 && !Character.isSpaceChar(c))) {
String flatDialect = s_diacritic_tolowerascii.get(c);
if (flatDialect != null)
sb.append(flatDialect);
Expand Down Expand Up @@ -413,16 +413,16 @@ public static void flattenChar(char c, StringBuilder sb) {
if (c <= 32)
return;

// include ASCII letters or digits and everything else
if (c > 127 || Character.isLetterOrDigit(c)) {
// include letters or digits and everything else non-7-bit-ASCII that is not a space
if (Character.isLetterOrDigit(c) || (c > 127 && !Character.isSpaceChar(c))) {
String flatDialect = s_diacritic_tolowerascii.get(c);
if (flatDialect != null)
sb.append(flatDialect);
else
sb.append(Character.toLowerCase(c));
}

// ...skip the remaining ASCII symbols
// ...skip remaining
}

} // (class)
4 changes: 2 additions & 2 deletions nodel-framework/src/main/java/org/nodel/Strings.java
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,11 @@ public static boolean isBlank(String value) {
if (len == 0)
return true;

// ...and then for any non common-whitespace
// ...and then for any non common-whitespace, incl. 0xa0 NO-BREAK SPACE
for (int a = 0; a < len; a++) {
char c = value.charAt(a);

if (c != ' ' && c != '\t' && c != '\r' && c != '\n')
if (c != ' ' && c != '\t' && c != '\r' && c != '\n' && !Character.isSpaceChar(c))
return false;
}

Expand Down
15 changes: 6 additions & 9 deletions nodel-framework/src/main/java/org/nodel/core/Nodel.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,6 @@
import org.nodel.discovery.AutoDNS;
import org.nodel.io.UnexpectedIOException;

// TODO: use one of these to implement character normalisation:
// http://stackoverflow.com/questions/1008802/converting-symbols-accent-letters-to-english-alphabet

public class Nodel {

private final static String VERSION = "2.2.1";
Expand Down Expand Up @@ -112,9 +109,9 @@ else if (removeCommentsOnly)
else if (Character.isLetterOrDigit(c))
sb.append(c);

else if (c > 127)
else if (c > 127 && !Character.isSpaceChar(c))
// every other extended ASCII and Unicode character
// is significant
// is significant (except space characters e.g. \u00A0 NO-BREAK SPACE)
sb.append(c);

// store last char for comments
Expand Down Expand Up @@ -474,19 +471,19 @@ private static String formatAgent() {
sb.append("nodel/").append(VERSION);

String javaRuntime = System.getProperty("java.runtime.version");
if (!Strings.isNullOrEmpty(javaRuntime))
if (!Strings.isBlank(javaRuntime))
sb.append(" java/").append(javaRuntime.replace(' ', '_'));

String vendor = System.getProperty("java.vm.vendor");
if (!Strings.isNullOrEmpty(vendor))
if (!Strings.isBlank(vendor))
sb.append(' ').append(vendor.replace(' ', '_'));

String os = System.getProperty("os.name");
if (!Strings.isNullOrEmpty(os))
if (!Strings.isBlank(os))
sb.append(' ').append(os.replace(' ', '_'));

String arch = System.getProperty("sun.arch.data.model");
if (!Strings.isNullOrEmpty(arch))
if (!Strings.isBlank(arch))
sb.append(" arch").append(arch.replace(' ', '_'));

return sb.toString();
Expand Down

0 comments on commit b8916be

Please sign in to comment.