diff --git a/nx01-x4o-fc18/src/test/java/org/x4o/fc18/cake2/fcdoc/kanji/KanjiDict.java b/nx01-x4o-fc18/src/test/java/org/x4o/fc18/cake2/fcdoc/kanji/KanjiDict.java index 2429074..7640aea 100644 --- a/nx01-x4o-fc18/src/test/java/org/x4o/fc18/cake2/fcdoc/kanji/KanjiDict.java +++ b/nx01-x4o-fc18/src/test/java/org/x4o/fc18/cake2/fcdoc/kanji/KanjiDict.java @@ -123,8 +123,9 @@ public class KanjiDict { mean = mean.split(",")[0]; mean = mean.split("\'")[0]; mean = mean.split("&")[0]; + mean = mean.split("/")[0]; mean = mean.trim(); - for (String rm : new String[] {"*",".", "/"}) { + for (String rm : new String[] {"*","."}) { if (mean.contains(rm)) { mean = ""; } diff --git a/nx01-x4o-fc18/src/test/java/org/x4o/fc18/cake2/fcdoc/kanji/KanjiDictTest.java b/nx01-x4o-fc18/src/test/java/org/x4o/fc18/cake2/fcdoc/kanji/KanjiDictTest.java index 4c089fb..18dd50a 100644 --- a/nx01-x4o-fc18/src/test/java/org/x4o/fc18/cake2/fcdoc/kanji/KanjiDictTest.java +++ b/nx01-x4o-fc18/src/test/java/org/x4o/fc18/cake2/fcdoc/kanji/KanjiDictTest.java @@ -25,7 +25,9 @@ package org.x4o.fc18.cake2.fcdoc.kanji; import java.io.File; import java.io.FileInputStream; import java.io.InputStream; +import java.util.HashMap; import java.util.HashSet; +import java.util.Map; import java.util.Set; import java.util.stream.Collectors; @@ -59,33 +61,44 @@ public class KanjiDictTest { int meanEmpty = 0; int meanFail = 0; int meanUse2 = 0; - Set uniqMean = new HashSet<>(); + int meanUse1 = 0; + Map uniqMean = new HashMap<>(); for (Integer key : dict.mapFourCorner.keySet()) { KanjiDictLiteral lit = dict.mapFourCorner.get(key); if (lit.meaning.isEmpty()) { meanEmpty++; continue; } - String meanFirst = lit.meaning.get(0); - if (uniqMean.contains(meanFirst)) { - if (lit.meaning.size() > 1) { - String meanSecond = lit.meaning.get(1); - if (uniqMean.contains(meanSecond)) { - meanFail++; - } else { - meanUse2++; - uniqMean.add(meanSecond); - } - } else { - meanFail++; - } - } else { - uniqMean.add(meanFirst); + String meanLast = lit.meaning.get(lit.meaning.size() - 1); + if (!uniqMean.containsKey(meanLast)) { + uniqMean.put(meanLast, lit); + continue; } + if (lit.meaning.size() > 1) { + String meanSecond = lit.meaning.get(1); + if (!uniqMean.containsKey(meanSecond)) { + uniqMean.put(meanSecond, lit); + meanUse2++; + continue; + } + } + String meanFirst = lit.meaning.get(0); + if (!uniqMean.containsKey(meanFirst)) { + uniqMean.put(meanFirst, lit); + meanUse1++; + continue; + } + meanFail++; } +// for (String litMean : uniqMean.keySet()) { +// KanjiDictLiteral lit = uniqMean.get(litMean); +// System.out.print(new StringBuilder().appendCodePoint(lit.codePoint).toString()); +// System.out.println(" = " + litMean); +// } // JIS X 0208 prescribes a set of 6879 graphical characters // JIS X 0213 + 0208 have total characters 11233 + // Japanese engineers have defined System.out.println("======================================="); System.out.println("mapFourCorner: " + dict.mapFourCorner.size()); System.out.println("mapKuTen208: " + dict.mapKuTen208.size()); @@ -95,15 +108,17 @@ public class KanjiDictTest { System.out.println("meanEmpty: " + meanEmpty); System.out.println("meanFail: " + meanFail); System.out.println("meanUse2: " + meanUse2); + System.out.println("meanUse1: " + meanUse1); // 15642334 bytes or 538402 lines gives; // ======================================= // mapFourCorner: 3936 // mapKuTen208: 6355 // mapKuTen213: 3695 // ======================================= - // meanUniq: 3332 - // meanEmpty: 1 - // meanFail: 603 - // meanUse2: 405 + // meanUniq: 3440 + // meanEmpty: 0 + // meanFail: 496 + // meanUse2: 233 + // meanUse1: 300 } }