Fixed empty meaning literal and match last meaning first for uniqness
This commit is contained in:
parent
f108053f73
commit
365505afc6
|
@ -123,8 +123,9 @@ public class KanjiDict {
|
||||||
mean = mean.split(",")[0];
|
mean = mean.split(",")[0];
|
||||||
mean = mean.split("\'")[0];
|
mean = mean.split("\'")[0];
|
||||||
mean = mean.split("&")[0];
|
mean = mean.split("&")[0];
|
||||||
|
mean = mean.split("/")[0];
|
||||||
mean = mean.trim();
|
mean = mean.trim();
|
||||||
for (String rm : new String[] {"*",".", "/"}) {
|
for (String rm : new String[] {"*","."}) {
|
||||||
if (mean.contains(rm)) {
|
if (mean.contains(rm)) {
|
||||||
mean = "";
|
mean = "";
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,7 +25,9 @@ package org.x4o.fc18.cake2.fcdoc.kanji;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
@ -59,33 +61,44 @@ public class KanjiDictTest {
|
||||||
int meanEmpty = 0;
|
int meanEmpty = 0;
|
||||||
int meanFail = 0;
|
int meanFail = 0;
|
||||||
int meanUse2 = 0;
|
int meanUse2 = 0;
|
||||||
Set<String> uniqMean = new HashSet<>();
|
int meanUse1 = 0;
|
||||||
|
Map<String, KanjiDictLiteral> uniqMean = new HashMap<>();
|
||||||
for (Integer key : dict.mapFourCorner.keySet()) {
|
for (Integer key : dict.mapFourCorner.keySet()) {
|
||||||
KanjiDictLiteral lit = dict.mapFourCorner.get(key);
|
KanjiDictLiteral lit = dict.mapFourCorner.get(key);
|
||||||
if (lit.meaning.isEmpty()) {
|
if (lit.meaning.isEmpty()) {
|
||||||
meanEmpty++;
|
meanEmpty++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
String meanFirst = lit.meaning.get(0);
|
String meanLast = lit.meaning.get(lit.meaning.size() - 1);
|
||||||
if (uniqMean.contains(meanFirst)) {
|
if (!uniqMean.containsKey(meanLast)) {
|
||||||
if (lit.meaning.size() > 1) {
|
uniqMean.put(meanLast, lit);
|
||||||
String meanSecond = lit.meaning.get(1);
|
continue;
|
||||||
if (uniqMean.contains(meanSecond)) {
|
|
||||||
meanFail++;
|
|
||||||
} else {
|
|
||||||
meanUse2++;
|
|
||||||
uniqMean.add(meanSecond);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
meanFail++;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
uniqMean.add(meanFirst);
|
|
||||||
}
|
}
|
||||||
|
if (lit.meaning.size() > 1) {
|
||||||
|
String meanSecond = lit.meaning.get(1);
|
||||||
|
if (!uniqMean.containsKey(meanSecond)) {
|
||||||
|
uniqMean.put(meanSecond, lit);
|
||||||
|
meanUse2++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
String meanFirst = lit.meaning.get(0);
|
||||||
|
if (!uniqMean.containsKey(meanFirst)) {
|
||||||
|
uniqMean.put(meanFirst, lit);
|
||||||
|
meanUse1++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
meanFail++;
|
||||||
}
|
}
|
||||||
|
// for (String litMean : uniqMean.keySet()) {
|
||||||
|
// KanjiDictLiteral lit = uniqMean.get(litMean);
|
||||||
|
// System.out.print(new StringBuilder().appendCodePoint(lit.codePoint).toString());
|
||||||
|
// System.out.println(" = " + litMean);
|
||||||
|
// }
|
||||||
|
|
||||||
// JIS X 0208 prescribes a set of 6879 graphical characters
|
// JIS X 0208 prescribes a set of 6879 graphical characters
|
||||||
// JIS X 0213 + 0208 have total characters 11233
|
// JIS X 0213 + 0208 have total characters 11233
|
||||||
|
// Japanese engineers have defined
|
||||||
System.out.println("=======================================");
|
System.out.println("=======================================");
|
||||||
System.out.println("mapFourCorner: " + dict.mapFourCorner.size());
|
System.out.println("mapFourCorner: " + dict.mapFourCorner.size());
|
||||||
System.out.println("mapKuTen208: " + dict.mapKuTen208.size());
|
System.out.println("mapKuTen208: " + dict.mapKuTen208.size());
|
||||||
|
@ -95,15 +108,17 @@ public class KanjiDictTest {
|
||||||
System.out.println("meanEmpty: " + meanEmpty);
|
System.out.println("meanEmpty: " + meanEmpty);
|
||||||
System.out.println("meanFail: " + meanFail);
|
System.out.println("meanFail: " + meanFail);
|
||||||
System.out.println("meanUse2: " + meanUse2);
|
System.out.println("meanUse2: " + meanUse2);
|
||||||
|
System.out.println("meanUse1: " + meanUse1);
|
||||||
// 15642334 bytes or 538402 lines gives;
|
// 15642334 bytes or 538402 lines gives;
|
||||||
// =======================================
|
// =======================================
|
||||||
// mapFourCorner: 3936
|
// mapFourCorner: 3936
|
||||||
// mapKuTen208: 6355
|
// mapKuTen208: 6355
|
||||||
// mapKuTen213: 3695
|
// mapKuTen213: 3695
|
||||||
// =======================================
|
// =======================================
|
||||||
// meanUniq: 3332
|
// meanUniq: 3440
|
||||||
// meanEmpty: 1
|
// meanEmpty: 0
|
||||||
// meanFail: 603
|
// meanFail: 496
|
||||||
// meanUse2: 405
|
// meanUse2: 233
|
||||||
|
// meanUse1: 300
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue