Added reading spanish words from kanji data

This commit is contained in:
Willem Cazander 2025-01-24 23:54:53 +01:00
parent 12785fff61
commit ca80b2e2ae
3 changed files with 108 additions and 48 deletions

View file

@ -72,14 +72,14 @@ public class KanjiDict {
String mLang; String mLang;
private void addLiteral() { private void addLiteral() {
if (literal.kuTen208 != null) { if (literal.getKuTen208() != null) {
mapKuTen208.put(literal.kuTen208, literal); mapKuTen208.put(literal.getKuTen208(), literal);
} }
if (literal.kuTen213 != null) { if (literal.getKuTen213() != null) {
mapKuTen213.put(literal.kuTen213, literal); mapKuTen213.put(literal.getKuTen213(), literal);
} }
if (literal.fourCorner != null) { if (literal.getFourCorner() != null) {
mapFourCorner.put(literal.fourCorner, literal); mapFourCorner.put(literal.getFourCorner(), literal);
} }
literal = null; literal = null;
} }
@ -99,18 +99,17 @@ public class KanjiDict {
addLiteral(); addLiteral();
} else if ("cp_value".equals(qName)) { } else if ("cp_value".equals(qName)) {
if ("ucs".equals(cpType)) { if ("ucs".equals(cpType)) {
literal.codePoint = Integer.parseInt(bufChar.toString(), 16); literal.setCodePoint(Integer.parseInt(bufChar.toString(), 16));
} else if ("jis208".equals(cpType)) { } else if ("jis208".equals(cpType)) {
literal.kuTen208 = bufChar.toString(); literal.setKuTen208(bufChar.toString());
} else if ("jis213".equals(cpType)) { } else if ("jis213".equals(cpType)) {
literal.kuTen213 = bufChar.toString(); literal.setKuTen213(bufChar.toString());
} }
} else if ("q_code".equals(qName)) { } else if ("q_code".equals(qName)) {
if ("four_corner".equals(qcType)) { if ("four_corner".equals(qcType)) {
literal.fourCorner = Integer.parseInt(bufChar.toString().replaceAll("\\.", "")); literal.setFourCorner(Integer.parseInt(bufChar.toString().replaceAll("\\.", "")));
} }
} else if ("meaning".equals(qName)) { } else if ("meaning".equals(qName)) {
if (mLang == null) {
String mean = bufChar.toString(); String mean = bufChar.toString();
if (mean.startsWith("(")) { if (mean.startsWith("(")) {
mean = mean.replaceAll("\\(", ""); mean = mean.replaceAll("\\(", "");
@ -131,8 +130,7 @@ public class KanjiDict {
} }
} }
if (!mean.isEmpty()) { if (!mean.isEmpty()) {
literal.meaning.add(mean); literal.addMeaning(mLang ,mean);
}
} }
mLang = null; mLang = null;
} }

View file

@ -23,7 +23,9 @@
package org.x4o.fc18.cake2.fcdoc.kanji; package org.x4o.fc18.cake2.fcdoc.kanji;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map;
/** /**
* Kanji KuTen and FourCorner and codePoint and meaning of a literal. * Kanji KuTen and FourCorner and codePoint and meaning of a literal.
@ -33,9 +35,62 @@ import java.util.List;
*/ */
public class KanjiDictLiteral { public class KanjiDictLiteral {
String kuTen208; private String kuTen208;
String kuTen213; private String kuTen213;
Integer fourCorner; private Integer fourCorner;
Integer codePoint; private Integer codePoint;
final List<String> meaning = new ArrayList<>(); private final Map<String, List<String>> meaning = new HashMap<>();
public KanjiDictLiteral() {
}
public String getKuTen208() {
return kuTen208;
}
public void setKuTen208(String kuTen208) {
this.kuTen208 = kuTen208;
}
public String getKuTen213() {
return kuTen213;
}
public void setKuTen213(String kuTen213) {
this.kuTen213 = kuTen213;
}
public Integer getFourCorner() {
return fourCorner;
}
public void setFourCorner(Integer fourCorner) {
this.fourCorner = fourCorner;
}
public Integer getCodePoint() {
return codePoint;
}
public void setCodePoint(Integer codePoint) {
this.codePoint = codePoint;
}
public Map<String, List<String>> getMeaning() {
return meaning;
}
public List<String> getMeaningDefault() {
return meaning.get("");
}
public void addMeaning(String locale, String text) {
String key = locale==null?"":locale;
List<String> data = meaning.get(key);
if (data == null) {
data = new ArrayList<>();
meaning.put(key, data);
}
data.add(text);
}
} }

View file

@ -26,9 +26,8 @@ import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.InputStream; import java.io.InputStream;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors; import java.util.stream.Collectors;
/** /**
@ -52,10 +51,18 @@ public class KanjiDictTest {
System.out.println("======================================="); System.out.println("=======================================");
for (Integer key : dict.mapFourCorner.keySet()) { for (Integer key : dict.mapFourCorner.keySet()) {
KanjiDictLiteral lit = dict.mapFourCorner.get(key); KanjiDictLiteral lit = dict.mapFourCorner.get(key);
int meanCnt = lit.meaning.size(); int meanCnt = lit.getMeaningDefault().size();
String meaning = lit.meaning.stream().map(v -> v.replaceAll(" ", "_").toUpperCase()).collect(Collectors.joining(" ")); String meaning = lit.getMeaningDefault().stream().map(v -> v.replaceAll(" ", "\u3000")).collect(Collectors.joining(" - "));
System.out.print(new StringBuilder().appendCodePoint(lit.codePoint).toString()); System.out.print(new StringBuilder().appendCodePoint(lit.getCodePoint()).toString());
System.out.println(String.format(" %s FC %5d = %2d %s", lit.kuTen208, lit.fourCorner, meanCnt, meaning)); System.out.println(String.format(" %s FC %5d = %2d %s", lit.getKuTen208(), lit.getFourCorner(), meanCnt, meaning));
for (String locale : lit.getMeaning().keySet()) {
if (!"es".equals(locale)) {
continue;
}
List<String> texts = lit.getMeaning().get(locale);
String text = texts.stream().map(v -> v.replaceAll(" ", "\u3000")).collect(Collectors.joining(" - "));
System.out.println("\t\tLOCALE: " + locale + " TEXT: " + text);
}
} }
int meanEmpty = 0; int meanEmpty = 0;
@ -65,24 +72,24 @@ public class KanjiDictTest {
Map<String, KanjiDictLiteral> uniqMean = new HashMap<>(); Map<String, KanjiDictLiteral> uniqMean = new HashMap<>();
for (Integer key : dict.mapFourCorner.keySet()) { for (Integer key : dict.mapFourCorner.keySet()) {
KanjiDictLiteral lit = dict.mapFourCorner.get(key); KanjiDictLiteral lit = dict.mapFourCorner.get(key);
if (lit.meaning.isEmpty()) { if (lit.getMeaningDefault().isEmpty()) {
meanEmpty++; meanEmpty++;
continue; continue;
} }
String meanLast = lit.meaning.get(lit.meaning.size() - 1); String meanLast = lit.getMeaningDefault().get(lit.getMeaningDefault().size() - 1);
if (!uniqMean.containsKey(meanLast)) { if (!uniqMean.containsKey(meanLast)) {
uniqMean.put(meanLast, lit); uniqMean.put(meanLast, lit);
continue; continue;
} }
if (lit.meaning.size() > 1) { if (lit.getMeaningDefault().size() > 1) {
String meanSecond = lit.meaning.get(1); String meanSecond = lit.getMeaningDefault().get(1);
if (!uniqMean.containsKey(meanSecond)) { if (!uniqMean.containsKey(meanSecond)) {
uniqMean.put(meanSecond, lit); uniqMean.put(meanSecond, lit);
meanUse2++; meanUse2++;
continue; continue;
} }
} }
String meanFirst = lit.meaning.get(0); String meanFirst = lit.getMeaningDefault().get(0);
if (!uniqMean.containsKey(meanFirst)) { if (!uniqMean.containsKey(meanFirst)) {
uniqMean.put(meanFirst, lit); uniqMean.put(meanFirst, lit);
meanUse1++; meanUse1++;